xref: /linux/kernel/trace/trace.c (revision 744fab2d9ff9177a27647c3710e86d43f2efe68c)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/cleanup.h>
30 #include <linux/vmalloc.h>
31 #include <linux/ftrace.h>
32 #include <linux/module.h>
33 #include <linux/percpu.h>
34 #include <linux/splice.h>
35 #include <linux/kdebug.h>
36 #include <linux/string.h>
37 #include <linux/mount.h>
38 #include <linux/rwsem.h>
39 #include <linux/slab.h>
40 #include <linux/ctype.h>
41 #include <linux/init.h>
42 #include <linux/panic_notifier.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67 
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73 
disable_tracing_selftest(const char * reason)74 void __init disable_tracing_selftest(const char *reason)
75 {
76 	if (!tracing_selftest_disabled) {
77 		tracing_selftest_disabled = true;
78 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
79 	}
80 }
81 #else
82 #define tracing_selftest_running	0
83 #define tracing_selftest_disabled	0
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static bool traceoff_after_boot __initdata;
91 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
92 
93 /* For tracers that don't implement custom flags */
94 static struct tracer_opt dummy_tracer_opt[] = {
95 	{ }
96 };
97 
98 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)99 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
100 {
101 	return 0;
102 }
103 
104 /*
105  * To prevent the comm cache from being overwritten when no
106  * tracing is active, only save the comm when a trace event
107  * occurred.
108  */
109 DEFINE_PER_CPU(bool, trace_taskinfo_save);
110 
111 /*
112  * Kill all tracing for good (never come back).
113  * It is initialized to 1 but will turn to zero if the initialization
114  * of the tracer is successful. But that is the only place that sets
115  * this back to zero.
116  */
117 static int tracing_disabled = 1;
118 
119 cpumask_var_t __read_mostly	tracing_buffer_mask;
120 
121 /*
122  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
123  *
124  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
125  * is set, then ftrace_dump is called. This will output the contents
126  * of the ftrace buffers to the console.  This is very useful for
127  * capturing traces that lead to crashes and outputing it to a
128  * serial console.
129  *
130  * It is default off, but you can enable it with either specifying
131  * "ftrace_dump_on_oops" in the kernel command line, or setting
132  * /proc/sys/kernel/ftrace_dump_on_oops
133  * Set 1 if you want to dump buffers of all CPUs
134  * Set 2 if you want to dump the buffer of the CPU that triggered oops
135  * Set instance name if you want to dump the specific trace instance
136  * Multiple instance dump is also supported, and instances are seperated
137  * by commas.
138  */
139 /* Set to string format zero to disable by default */
140 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
141 
142 /* When set, tracing will stop when a WARN*() is hit */
143 int __disable_trace_on_warning;
144 
145 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
146 /* Map of enums to their values, for "eval_map" file */
147 struct trace_eval_map_head {
148 	struct module			*mod;
149 	unsigned long			length;
150 };
151 
152 union trace_eval_map_item;
153 
154 struct trace_eval_map_tail {
155 	/*
156 	 * "end" is first and points to NULL as it must be different
157 	 * than "mod" or "eval_string"
158 	 */
159 	union trace_eval_map_item	*next;
160 	const char			*end;	/* points to NULL */
161 };
162 
163 static DEFINE_MUTEX(trace_eval_mutex);
164 
165 /*
166  * The trace_eval_maps are saved in an array with two extra elements,
167  * one at the beginning, and one at the end. The beginning item contains
168  * the count of the saved maps (head.length), and the module they
169  * belong to if not built in (head.mod). The ending item contains a
170  * pointer to the next array of saved eval_map items.
171  */
172 union trace_eval_map_item {
173 	struct trace_eval_map		map;
174 	struct trace_eval_map_head	head;
175 	struct trace_eval_map_tail	tail;
176 };
177 
178 static union trace_eval_map_item *trace_eval_maps;
179 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
180 
181 int tracing_set_tracer(struct trace_array *tr, const char *buf);
182 static void ftrace_trace_userstack(struct trace_array *tr,
183 				   struct trace_buffer *buffer,
184 				   unsigned int trace_ctx);
185 
186 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
187 static char *default_bootup_tracer;
188 
189 static bool allocate_snapshot;
190 static bool snapshot_at_boot;
191 
192 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_instance_index;
194 
195 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
196 static int boot_snapshot_index;
197 
set_cmdline_ftrace(char * str)198 static int __init set_cmdline_ftrace(char *str)
199 {
200 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
201 	default_bootup_tracer = bootup_tracer_buf;
202 	/* We are using ftrace early, expand it */
203 	trace_set_ring_buffer_expanded(NULL);
204 	return 1;
205 }
206 __setup("ftrace=", set_cmdline_ftrace);
207 
ftrace_dump_on_oops_enabled(void)208 int ftrace_dump_on_oops_enabled(void)
209 {
210 	if (!strcmp("0", ftrace_dump_on_oops))
211 		return 0;
212 	else
213 		return 1;
214 }
215 
set_ftrace_dump_on_oops(char * str)216 static int __init set_ftrace_dump_on_oops(char *str)
217 {
218 	if (!*str) {
219 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
220 		return 1;
221 	}
222 
223 	if (*str == ',') {
224 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
225 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
226 		return 1;
227 	}
228 
229 	if (*str++ == '=') {
230 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
231 		return 1;
232 	}
233 
234 	return 0;
235 }
236 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
237 
stop_trace_on_warning(char * str)238 static int __init stop_trace_on_warning(char *str)
239 {
240 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
241 		__disable_trace_on_warning = 1;
242 	return 1;
243 }
244 __setup("traceoff_on_warning", stop_trace_on_warning);
245 
boot_alloc_snapshot(char * str)246 static int __init boot_alloc_snapshot(char *str)
247 {
248 	char *slot = boot_snapshot_info + boot_snapshot_index;
249 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
250 	int ret;
251 
252 	if (str[0] == '=') {
253 		str++;
254 		if (strlen(str) >= left)
255 			return -1;
256 
257 		ret = snprintf(slot, left, "%s\t", str);
258 		boot_snapshot_index += ret;
259 	} else {
260 		allocate_snapshot = true;
261 		/* We also need the main ring buffer expanded */
262 		trace_set_ring_buffer_expanded(NULL);
263 	}
264 	return 1;
265 }
266 __setup("alloc_snapshot", boot_alloc_snapshot);
267 
268 
boot_snapshot(char * str)269 static int __init boot_snapshot(char *str)
270 {
271 	snapshot_at_boot = true;
272 	boot_alloc_snapshot(str);
273 	return 1;
274 }
275 __setup("ftrace_boot_snapshot", boot_snapshot);
276 
277 
boot_instance(char * str)278 static int __init boot_instance(char *str)
279 {
280 	char *slot = boot_instance_info + boot_instance_index;
281 	int left = sizeof(boot_instance_info) - boot_instance_index;
282 	int ret;
283 
284 	if (strlen(str) >= left)
285 		return -1;
286 
287 	ret = snprintf(slot, left, "%s\t", str);
288 	boot_instance_index += ret;
289 
290 	return 1;
291 }
292 __setup("trace_instance=", boot_instance);
293 
294 
295 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
296 
set_trace_boot_options(char * str)297 static int __init set_trace_boot_options(char *str)
298 {
299 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
300 	return 1;
301 }
302 __setup("trace_options=", set_trace_boot_options);
303 
304 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
305 static char *trace_boot_clock __initdata;
306 
set_trace_boot_clock(char * str)307 static int __init set_trace_boot_clock(char *str)
308 {
309 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
310 	trace_boot_clock = trace_boot_clock_buf;
311 	return 1;
312 }
313 __setup("trace_clock=", set_trace_boot_clock);
314 
set_tracepoint_printk(char * str)315 static int __init set_tracepoint_printk(char *str)
316 {
317 	/* Ignore the "tp_printk_stop_on_boot" param */
318 	if (*str == '_')
319 		return 0;
320 
321 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
322 		tracepoint_printk = 1;
323 	return 1;
324 }
325 __setup("tp_printk", set_tracepoint_printk);
326 
set_tracepoint_printk_stop(char * str)327 static int __init set_tracepoint_printk_stop(char *str)
328 {
329 	tracepoint_printk_stop_on_boot = true;
330 	return 1;
331 }
332 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
333 
set_traceoff_after_boot(char * str)334 static int __init set_traceoff_after_boot(char *str)
335 {
336 	traceoff_after_boot = true;
337 	return 1;
338 }
339 __setup("traceoff_after_boot", set_traceoff_after_boot);
340 
ns2usecs(u64 nsec)341 unsigned long long ns2usecs(u64 nsec)
342 {
343 	nsec += 500;
344 	do_div(nsec, 1000);
345 	return nsec;
346 }
347 
348 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)349 trace_process_export(struct trace_export *export,
350 	       struct ring_buffer_event *event, int flag)
351 {
352 	struct trace_entry *entry;
353 	unsigned int size = 0;
354 
355 	if (export->flags & flag) {
356 		entry = ring_buffer_event_data(event);
357 		size = ring_buffer_event_length(event);
358 		export->write(export, entry, size);
359 	}
360 }
361 
362 static DEFINE_MUTEX(ftrace_export_lock);
363 
364 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
365 
366 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
367 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
368 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
369 
ftrace_exports_enable(struct trace_export * export)370 static inline void ftrace_exports_enable(struct trace_export *export)
371 {
372 	if (export->flags & TRACE_EXPORT_FUNCTION)
373 		static_branch_inc(&trace_function_exports_enabled);
374 
375 	if (export->flags & TRACE_EXPORT_EVENT)
376 		static_branch_inc(&trace_event_exports_enabled);
377 
378 	if (export->flags & TRACE_EXPORT_MARKER)
379 		static_branch_inc(&trace_marker_exports_enabled);
380 }
381 
ftrace_exports_disable(struct trace_export * export)382 static inline void ftrace_exports_disable(struct trace_export *export)
383 {
384 	if (export->flags & TRACE_EXPORT_FUNCTION)
385 		static_branch_dec(&trace_function_exports_enabled);
386 
387 	if (export->flags & TRACE_EXPORT_EVENT)
388 		static_branch_dec(&trace_event_exports_enabled);
389 
390 	if (export->flags & TRACE_EXPORT_MARKER)
391 		static_branch_dec(&trace_marker_exports_enabled);
392 }
393 
ftrace_exports(struct ring_buffer_event * event,int flag)394 static void ftrace_exports(struct ring_buffer_event *event, int flag)
395 {
396 	struct trace_export *export;
397 
398 	preempt_disable_notrace();
399 
400 	export = rcu_dereference_raw_check(ftrace_exports_list);
401 	while (export) {
402 		trace_process_export(export, event, flag);
403 		export = rcu_dereference_raw_check(export->next);
404 	}
405 
406 	preempt_enable_notrace();
407 }
408 
409 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)410 add_trace_export(struct trace_export **list, struct trace_export *export)
411 {
412 	rcu_assign_pointer(export->next, *list);
413 	/*
414 	 * We are entering export into the list but another
415 	 * CPU might be walking that list. We need to make sure
416 	 * the export->next pointer is valid before another CPU sees
417 	 * the export pointer included into the list.
418 	 */
419 	rcu_assign_pointer(*list, export);
420 }
421 
422 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)423 rm_trace_export(struct trace_export **list, struct trace_export *export)
424 {
425 	struct trace_export **p;
426 
427 	for (p = list; *p != NULL; p = &(*p)->next)
428 		if (*p == export)
429 			break;
430 
431 	if (*p != export)
432 		return -1;
433 
434 	rcu_assign_pointer(*p, (*p)->next);
435 
436 	return 0;
437 }
438 
439 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)440 add_ftrace_export(struct trace_export **list, struct trace_export *export)
441 {
442 	ftrace_exports_enable(export);
443 
444 	add_trace_export(list, export);
445 }
446 
447 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)448 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
449 {
450 	int ret;
451 
452 	ret = rm_trace_export(list, export);
453 	ftrace_exports_disable(export);
454 
455 	return ret;
456 }
457 
register_ftrace_export(struct trace_export * export)458 int register_ftrace_export(struct trace_export *export)
459 {
460 	if (WARN_ON_ONCE(!export->write))
461 		return -1;
462 
463 	mutex_lock(&ftrace_export_lock);
464 
465 	add_ftrace_export(&ftrace_exports_list, export);
466 
467 	mutex_unlock(&ftrace_export_lock);
468 
469 	return 0;
470 }
471 EXPORT_SYMBOL_GPL(register_ftrace_export);
472 
unregister_ftrace_export(struct trace_export * export)473 int unregister_ftrace_export(struct trace_export *export)
474 {
475 	int ret;
476 
477 	mutex_lock(&ftrace_export_lock);
478 
479 	ret = rm_ftrace_export(&ftrace_exports_list, export);
480 
481 	mutex_unlock(&ftrace_export_lock);
482 
483 	return ret;
484 }
485 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
486 
487 /* trace_flags holds trace_options default values */
488 #define TRACE_DEFAULT_FLAGS						\
489 	(FUNCTION_DEFAULT_FLAGS |					\
490 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
491 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
492 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
493 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
494 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
495 
496 /* trace_options that are only supported by global_trace */
497 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
498 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
499 
500 /* trace_flags that are default zero for instances */
501 #define ZEROED_TRACE_FLAGS \
502 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
503 
504 /*
505  * The global_trace is the descriptor that holds the top-level tracing
506  * buffers for the live tracing.
507  */
508 static struct trace_array global_trace = {
509 	.trace_flags = TRACE_DEFAULT_FLAGS,
510 };
511 
512 static struct trace_array *printk_trace = &global_trace;
513 
printk_binsafe(struct trace_array * tr)514 static __always_inline bool printk_binsafe(struct trace_array *tr)
515 {
516 	/*
517 	 * The binary format of traceprintk can cause a crash if used
518 	 * by a buffer from another boot. Force the use of the
519 	 * non binary version of trace_printk if the trace_printk
520 	 * buffer is a boot mapped ring buffer.
521 	 */
522 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
523 }
524 
update_printk_trace(struct trace_array * tr)525 static void update_printk_trace(struct trace_array *tr)
526 {
527 	if (printk_trace == tr)
528 		return;
529 
530 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
531 	printk_trace = tr;
532 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
533 }
534 
trace_set_ring_buffer_expanded(struct trace_array * tr)535 void trace_set_ring_buffer_expanded(struct trace_array *tr)
536 {
537 	if (!tr)
538 		tr = &global_trace;
539 	tr->ring_buffer_expanded = true;
540 }
541 
542 LIST_HEAD(ftrace_trace_arrays);
543 
trace_array_get(struct trace_array * this_tr)544 int trace_array_get(struct trace_array *this_tr)
545 {
546 	struct trace_array *tr;
547 
548 	guard(mutex)(&trace_types_lock);
549 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
550 		if (tr == this_tr) {
551 			tr->ref++;
552 			return 0;
553 		}
554 	}
555 
556 	return -ENODEV;
557 }
558 
__trace_array_put(struct trace_array * this_tr)559 static void __trace_array_put(struct trace_array *this_tr)
560 {
561 	WARN_ON(!this_tr->ref);
562 	this_tr->ref--;
563 }
564 
565 /**
566  * trace_array_put - Decrement the reference counter for this trace array.
567  * @this_tr : pointer to the trace array
568  *
569  * NOTE: Use this when we no longer need the trace array returned by
570  * trace_array_get_by_name(). This ensures the trace array can be later
571  * destroyed.
572  *
573  */
trace_array_put(struct trace_array * this_tr)574 void trace_array_put(struct trace_array *this_tr)
575 {
576 	if (!this_tr)
577 		return;
578 
579 	mutex_lock(&trace_types_lock);
580 	__trace_array_put(this_tr);
581 	mutex_unlock(&trace_types_lock);
582 }
583 EXPORT_SYMBOL_GPL(trace_array_put);
584 
tracing_check_open_get_tr(struct trace_array * tr)585 int tracing_check_open_get_tr(struct trace_array *tr)
586 {
587 	int ret;
588 
589 	ret = security_locked_down(LOCKDOWN_TRACEFS);
590 	if (ret)
591 		return ret;
592 
593 	if (tracing_disabled)
594 		return -ENODEV;
595 
596 	if (tr && trace_array_get(tr) < 0)
597 		return -ENODEV;
598 
599 	return 0;
600 }
601 
602 /**
603  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
604  * @filtered_pids: The list of pids to check
605  * @search_pid: The PID to find in @filtered_pids
606  *
607  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
608  */
609 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)610 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
611 {
612 	return trace_pid_list_is_set(filtered_pids, search_pid);
613 }
614 
615 /**
616  * trace_ignore_this_task - should a task be ignored for tracing
617  * @filtered_pids: The list of pids to check
618  * @filtered_no_pids: The list of pids not to be traced
619  * @task: The task that should be ignored if not filtered
620  *
621  * Checks if @task should be traced or not from @filtered_pids.
622  * Returns true if @task should *NOT* be traced.
623  * Returns false if @task should be traced.
624  */
625 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)626 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
627 		       struct trace_pid_list *filtered_no_pids,
628 		       struct task_struct *task)
629 {
630 	/*
631 	 * If filtered_no_pids is not empty, and the task's pid is listed
632 	 * in filtered_no_pids, then return true.
633 	 * Otherwise, if filtered_pids is empty, that means we can
634 	 * trace all tasks. If it has content, then only trace pids
635 	 * within filtered_pids.
636 	 */
637 
638 	return (filtered_pids &&
639 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
640 		(filtered_no_pids &&
641 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
642 }
643 
644 /**
645  * trace_filter_add_remove_task - Add or remove a task from a pid_list
646  * @pid_list: The list to modify
647  * @self: The current task for fork or NULL for exit
648  * @task: The task to add or remove
649  *
650  * If adding a task, if @self is defined, the task is only added if @self
651  * is also included in @pid_list. This happens on fork and tasks should
652  * only be added when the parent is listed. If @self is NULL, then the
653  * @task pid will be removed from the list, which would happen on exit
654  * of a task.
655  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)656 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
657 				  struct task_struct *self,
658 				  struct task_struct *task)
659 {
660 	if (!pid_list)
661 		return;
662 
663 	/* For forks, we only add if the forking task is listed */
664 	if (self) {
665 		if (!trace_find_filtered_pid(pid_list, self->pid))
666 			return;
667 	}
668 
669 	/* "self" is set for forks, and NULL for exits */
670 	if (self)
671 		trace_pid_list_set(pid_list, task->pid);
672 	else
673 		trace_pid_list_clear(pid_list, task->pid);
674 }
675 
676 /**
677  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
678  * @pid_list: The pid list to show
679  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
680  * @pos: The position of the file
681  *
682  * This is used by the seq_file "next" operation to iterate the pids
683  * listed in a trace_pid_list structure.
684  *
685  * Returns the pid+1 as we want to display pid of zero, but NULL would
686  * stop the iteration.
687  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)688 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
689 {
690 	long pid = (unsigned long)v;
691 	unsigned int next;
692 
693 	(*pos)++;
694 
695 	/* pid already is +1 of the actual previous bit */
696 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
697 		return NULL;
698 
699 	pid = next;
700 
701 	/* Return pid + 1 to allow zero to be represented */
702 	return (void *)(pid + 1);
703 }
704 
705 /**
706  * trace_pid_start - Used for seq_file to start reading pid lists
707  * @pid_list: The pid list to show
708  * @pos: The position of the file
709  *
710  * This is used by seq_file "start" operation to start the iteration
711  * of listing pids.
712  *
713  * Returns the pid+1 as we want to display pid of zero, but NULL would
714  * stop the iteration.
715  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)716 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
717 {
718 	unsigned long pid;
719 	unsigned int first;
720 	loff_t l = 0;
721 
722 	if (trace_pid_list_first(pid_list, &first) < 0)
723 		return NULL;
724 
725 	pid = first;
726 
727 	/* Return pid + 1 so that zero can be the exit value */
728 	for (pid++; pid && l < *pos;
729 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
730 		;
731 	return (void *)pid;
732 }
733 
734 /**
735  * trace_pid_show - show the current pid in seq_file processing
736  * @m: The seq_file structure to write into
737  * @v: A void pointer of the pid (+1) value to display
738  *
739  * Can be directly used by seq_file operations to display the current
740  * pid value.
741  */
trace_pid_show(struct seq_file * m,void * v)742 int trace_pid_show(struct seq_file *m, void *v)
743 {
744 	unsigned long pid = (unsigned long)v - 1;
745 
746 	seq_printf(m, "%lu\n", pid);
747 	return 0;
748 }
749 
750 /* 128 should be much more than enough */
751 #define PID_BUF_SIZE		127
752 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)753 int trace_pid_write(struct trace_pid_list *filtered_pids,
754 		    struct trace_pid_list **new_pid_list,
755 		    const char __user *ubuf, size_t cnt)
756 {
757 	struct trace_pid_list *pid_list;
758 	struct trace_parser parser;
759 	unsigned long val;
760 	int nr_pids = 0;
761 	ssize_t read = 0;
762 	ssize_t ret;
763 	loff_t pos;
764 	pid_t pid;
765 
766 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
767 		return -ENOMEM;
768 
769 	/*
770 	 * Always recreate a new array. The write is an all or nothing
771 	 * operation. Always create a new array when adding new pids by
772 	 * the user. If the operation fails, then the current list is
773 	 * not modified.
774 	 */
775 	pid_list = trace_pid_list_alloc();
776 	if (!pid_list) {
777 		trace_parser_put(&parser);
778 		return -ENOMEM;
779 	}
780 
781 	if (filtered_pids) {
782 		/* copy the current bits to the new max */
783 		ret = trace_pid_list_first(filtered_pids, &pid);
784 		while (!ret) {
785 			trace_pid_list_set(pid_list, pid);
786 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
787 			nr_pids++;
788 		}
789 	}
790 
791 	ret = 0;
792 	while (cnt > 0) {
793 
794 		pos = 0;
795 
796 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
797 		if (ret < 0)
798 			break;
799 
800 		read += ret;
801 		ubuf += ret;
802 		cnt -= ret;
803 
804 		if (!trace_parser_loaded(&parser))
805 			break;
806 
807 		ret = -EINVAL;
808 		if (kstrtoul(parser.buffer, 0, &val))
809 			break;
810 
811 		pid = (pid_t)val;
812 
813 		if (trace_pid_list_set(pid_list, pid) < 0) {
814 			ret = -1;
815 			break;
816 		}
817 		nr_pids++;
818 
819 		trace_parser_clear(&parser);
820 		ret = 0;
821 	}
822 	trace_parser_put(&parser);
823 
824 	if (ret < 0) {
825 		trace_pid_list_free(pid_list);
826 		return ret;
827 	}
828 
829 	if (!nr_pids) {
830 		/* Cleared the list of pids */
831 		trace_pid_list_free(pid_list);
832 		pid_list = NULL;
833 	}
834 
835 	*new_pid_list = pid_list;
836 
837 	return read;
838 }
839 
buffer_ftrace_now(struct array_buffer * buf,int cpu)840 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
841 {
842 	u64 ts;
843 
844 	/* Early boot up does not have a buffer yet */
845 	if (!buf->buffer)
846 		return trace_clock_local();
847 
848 	ts = ring_buffer_time_stamp(buf->buffer);
849 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
850 
851 	return ts;
852 }
853 
ftrace_now(int cpu)854 u64 ftrace_now(int cpu)
855 {
856 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
857 }
858 
859 /**
860  * tracing_is_enabled - Show if global_trace has been enabled
861  *
862  * Shows if the global trace has been enabled or not. It uses the
863  * mirror flag "buffer_disabled" to be used in fast paths such as for
864  * the irqsoff tracer. But it may be inaccurate due to races. If you
865  * need to know the accurate state, use tracing_is_on() which is a little
866  * slower, but accurate.
867  */
tracing_is_enabled(void)868 int tracing_is_enabled(void)
869 {
870 	/*
871 	 * For quick access (irqsoff uses this in fast path), just
872 	 * return the mirror variable of the state of the ring buffer.
873 	 * It's a little racy, but we don't really care.
874 	 */
875 	smp_rmb();
876 	return !global_trace.buffer_disabled;
877 }
878 
879 /*
880  * trace_buf_size is the size in bytes that is allocated
881  * for a buffer. Note, the number of bytes is always rounded
882  * to page size.
883  *
884  * This number is purposely set to a low number of 16384.
885  * If the dump on oops happens, it will be much appreciated
886  * to not have to wait for all that output. Anyway this can be
887  * boot time and run time configurable.
888  */
889 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
890 
891 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
892 
893 /* trace_types holds a link list of available tracers. */
894 static struct tracer		*trace_types __read_mostly;
895 
896 /*
897  * trace_types_lock is used to protect the trace_types list.
898  */
899 DEFINE_MUTEX(trace_types_lock);
900 
901 /*
902  * serialize the access of the ring buffer
903  *
904  * ring buffer serializes readers, but it is low level protection.
905  * The validity of the events (which returns by ring_buffer_peek() ..etc)
906  * are not protected by ring buffer.
907  *
908  * The content of events may become garbage if we allow other process consumes
909  * these events concurrently:
910  *   A) the page of the consumed events may become a normal page
911  *      (not reader page) in ring buffer, and this page will be rewritten
912  *      by events producer.
913  *   B) The page of the consumed events may become a page for splice_read,
914  *      and this page will be returned to system.
915  *
916  * These primitives allow multi process access to different cpu ring buffer
917  * concurrently.
918  *
919  * These primitives don't distinguish read-only and read-consume access.
920  * Multi read-only access are also serialized.
921  */
922 
923 #ifdef CONFIG_SMP
924 static DECLARE_RWSEM(all_cpu_access_lock);
925 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
926 
trace_access_lock(int cpu)927 static inline void trace_access_lock(int cpu)
928 {
929 	if (cpu == RING_BUFFER_ALL_CPUS) {
930 		/* gain it for accessing the whole ring buffer. */
931 		down_write(&all_cpu_access_lock);
932 	} else {
933 		/* gain it for accessing a cpu ring buffer. */
934 
935 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
936 		down_read(&all_cpu_access_lock);
937 
938 		/* Secondly block other access to this @cpu ring buffer. */
939 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
940 	}
941 }
942 
trace_access_unlock(int cpu)943 static inline void trace_access_unlock(int cpu)
944 {
945 	if (cpu == RING_BUFFER_ALL_CPUS) {
946 		up_write(&all_cpu_access_lock);
947 	} else {
948 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
949 		up_read(&all_cpu_access_lock);
950 	}
951 }
952 
trace_access_lock_init(void)953 static inline void trace_access_lock_init(void)
954 {
955 	int cpu;
956 
957 	for_each_possible_cpu(cpu)
958 		mutex_init(&per_cpu(cpu_access_lock, cpu));
959 }
960 
961 #else
962 
963 static DEFINE_MUTEX(access_lock);
964 
trace_access_lock(int cpu)965 static inline void trace_access_lock(int cpu)
966 {
967 	(void)cpu;
968 	mutex_lock(&access_lock);
969 }
970 
trace_access_unlock(int cpu)971 static inline void trace_access_unlock(int cpu)
972 {
973 	(void)cpu;
974 	mutex_unlock(&access_lock);
975 }
976 
trace_access_lock_init(void)977 static inline void trace_access_lock_init(void)
978 {
979 }
980 
981 #endif
982 
983 #ifdef CONFIG_STACKTRACE
984 static void __ftrace_trace_stack(struct trace_array *tr,
985 				 struct trace_buffer *buffer,
986 				 unsigned int trace_ctx,
987 				 int skip, struct pt_regs *regs);
988 static inline void ftrace_trace_stack(struct trace_array *tr,
989 				      struct trace_buffer *buffer,
990 				      unsigned int trace_ctx,
991 				      int skip, struct pt_regs *regs);
992 
993 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)994 static inline void __ftrace_trace_stack(struct trace_array *tr,
995 					struct trace_buffer *buffer,
996 					unsigned int trace_ctx,
997 					int skip, struct pt_regs *regs)
998 {
999 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)1000 static inline void ftrace_trace_stack(struct trace_array *tr,
1001 				      struct trace_buffer *buffer,
1002 				      unsigned long trace_ctx,
1003 				      int skip, struct pt_regs *regs)
1004 {
1005 }
1006 
1007 #endif
1008 
1009 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1010 trace_event_setup(struct ring_buffer_event *event,
1011 		  int type, unsigned int trace_ctx)
1012 {
1013 	struct trace_entry *ent = ring_buffer_event_data(event);
1014 
1015 	tracing_generic_entry_update(ent, type, trace_ctx);
1016 }
1017 
1018 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1019 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1020 			  int type,
1021 			  unsigned long len,
1022 			  unsigned int trace_ctx)
1023 {
1024 	struct ring_buffer_event *event;
1025 
1026 	event = ring_buffer_lock_reserve(buffer, len);
1027 	if (event != NULL)
1028 		trace_event_setup(event, type, trace_ctx);
1029 
1030 	return event;
1031 }
1032 
tracer_tracing_on(struct trace_array * tr)1033 void tracer_tracing_on(struct trace_array *tr)
1034 {
1035 	if (tr->array_buffer.buffer)
1036 		ring_buffer_record_on(tr->array_buffer.buffer);
1037 	/*
1038 	 * This flag is looked at when buffers haven't been allocated
1039 	 * yet, or by some tracers (like irqsoff), that just want to
1040 	 * know if the ring buffer has been disabled, but it can handle
1041 	 * races of where it gets disabled but we still do a record.
1042 	 * As the check is in the fast path of the tracers, it is more
1043 	 * important to be fast than accurate.
1044 	 */
1045 	tr->buffer_disabled = 0;
1046 	/* Make the flag seen by readers */
1047 	smp_wmb();
1048 }
1049 
1050 /**
1051  * tracing_on - enable tracing buffers
1052  *
1053  * This function enables tracing buffers that may have been
1054  * disabled with tracing_off.
1055  */
tracing_on(void)1056 void tracing_on(void)
1057 {
1058 	tracer_tracing_on(&global_trace);
1059 }
1060 EXPORT_SYMBOL_GPL(tracing_on);
1061 
1062 
1063 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1064 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1065 {
1066 	__this_cpu_write(trace_taskinfo_save, true);
1067 
1068 	/* If this is the temp buffer, we need to commit fully */
1069 	if (this_cpu_read(trace_buffered_event) == event) {
1070 		/* Length is in event->array[0] */
1071 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1072 		/* Release the temp buffer */
1073 		this_cpu_dec(trace_buffered_event_cnt);
1074 		/* ring_buffer_unlock_commit() enables preemption */
1075 		preempt_enable_notrace();
1076 	} else
1077 		ring_buffer_unlock_commit(buffer);
1078 }
1079 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1080 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1081 		       const char *str, int size)
1082 {
1083 	struct ring_buffer_event *event;
1084 	struct trace_buffer *buffer;
1085 	struct print_entry *entry;
1086 	unsigned int trace_ctx;
1087 	int alloc;
1088 
1089 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1090 		return 0;
1091 
1092 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1093 		return 0;
1094 
1095 	if (unlikely(tracing_disabled))
1096 		return 0;
1097 
1098 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1099 
1100 	trace_ctx = tracing_gen_ctx();
1101 	buffer = tr->array_buffer.buffer;
1102 	ring_buffer_nest_start(buffer);
1103 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1104 					    trace_ctx);
1105 	if (!event) {
1106 		size = 0;
1107 		goto out;
1108 	}
1109 
1110 	entry = ring_buffer_event_data(event);
1111 	entry->ip = ip;
1112 
1113 	memcpy(&entry->buf, str, size);
1114 
1115 	/* Add a newline if necessary */
1116 	if (entry->buf[size - 1] != '\n') {
1117 		entry->buf[size] = '\n';
1118 		entry->buf[size + 1] = '\0';
1119 	} else
1120 		entry->buf[size] = '\0';
1121 
1122 	__buffer_unlock_commit(buffer, event);
1123 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1124  out:
1125 	ring_buffer_nest_end(buffer);
1126 	return size;
1127 }
1128 EXPORT_SYMBOL_GPL(__trace_array_puts);
1129 
1130 /**
1131  * __trace_puts - write a constant string into the trace buffer.
1132  * @ip:	   The address of the caller
1133  * @str:   The constant string to write
1134  * @size:  The size of the string.
1135  */
__trace_puts(unsigned long ip,const char * str,int size)1136 int __trace_puts(unsigned long ip, const char *str, int size)
1137 {
1138 	return __trace_array_puts(printk_trace, ip, str, size);
1139 }
1140 EXPORT_SYMBOL_GPL(__trace_puts);
1141 
1142 /**
1143  * __trace_bputs - write the pointer to a constant string into trace buffer
1144  * @ip:	   The address of the caller
1145  * @str:   The constant string to write to the buffer to
1146  */
__trace_bputs(unsigned long ip,const char * str)1147 int __trace_bputs(unsigned long ip, const char *str)
1148 {
1149 	struct trace_array *tr = READ_ONCE(printk_trace);
1150 	struct ring_buffer_event *event;
1151 	struct trace_buffer *buffer;
1152 	struct bputs_entry *entry;
1153 	unsigned int trace_ctx;
1154 	int size = sizeof(struct bputs_entry);
1155 	int ret = 0;
1156 
1157 	if (!printk_binsafe(tr))
1158 		return __trace_puts(ip, str, strlen(str));
1159 
1160 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1161 		return 0;
1162 
1163 	if (unlikely(tracing_selftest_running || tracing_disabled))
1164 		return 0;
1165 
1166 	trace_ctx = tracing_gen_ctx();
1167 	buffer = tr->array_buffer.buffer;
1168 
1169 	ring_buffer_nest_start(buffer);
1170 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1171 					    trace_ctx);
1172 	if (!event)
1173 		goto out;
1174 
1175 	entry = ring_buffer_event_data(event);
1176 	entry->ip			= ip;
1177 	entry->str			= str;
1178 
1179 	__buffer_unlock_commit(buffer, event);
1180 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1181 
1182 	ret = 1;
1183  out:
1184 	ring_buffer_nest_end(buffer);
1185 	return ret;
1186 }
1187 EXPORT_SYMBOL_GPL(__trace_bputs);
1188 
1189 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1190 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1191 					   void *cond_data)
1192 {
1193 	struct tracer *tracer = tr->current_trace;
1194 	unsigned long flags;
1195 
1196 	if (in_nmi()) {
1197 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1198 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1199 		return;
1200 	}
1201 
1202 	if (!tr->allocated_snapshot) {
1203 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1204 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1205 		tracer_tracing_off(tr);
1206 		return;
1207 	}
1208 
1209 	/* Note, snapshot can not be used when the tracer uses it */
1210 	if (tracer->use_max_tr) {
1211 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1212 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213 		return;
1214 	}
1215 
1216 	if (tr->mapped) {
1217 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1218 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1219 		return;
1220 	}
1221 
1222 	local_irq_save(flags);
1223 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1224 	local_irq_restore(flags);
1225 }
1226 
tracing_snapshot_instance(struct trace_array * tr)1227 void tracing_snapshot_instance(struct trace_array *tr)
1228 {
1229 	tracing_snapshot_instance_cond(tr, NULL);
1230 }
1231 
1232 /**
1233  * tracing_snapshot - take a snapshot of the current buffer.
1234  *
1235  * This causes a swap between the snapshot buffer and the current live
1236  * tracing buffer. You can use this to take snapshots of the live
1237  * trace when some condition is triggered, but continue to trace.
1238  *
1239  * Note, make sure to allocate the snapshot with either
1240  * a tracing_snapshot_alloc(), or by doing it manually
1241  * with: echo 1 > /sys/kernel/tracing/snapshot
1242  *
1243  * If the snapshot buffer is not allocated, it will stop tracing.
1244  * Basically making a permanent snapshot.
1245  */
tracing_snapshot(void)1246 void tracing_snapshot(void)
1247 {
1248 	struct trace_array *tr = &global_trace;
1249 
1250 	tracing_snapshot_instance(tr);
1251 }
1252 EXPORT_SYMBOL_GPL(tracing_snapshot);
1253 
1254 /**
1255  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1256  * @tr:		The tracing instance to snapshot
1257  * @cond_data:	The data to be tested conditionally, and possibly saved
1258  *
1259  * This is the same as tracing_snapshot() except that the snapshot is
1260  * conditional - the snapshot will only happen if the
1261  * cond_snapshot.update() implementation receiving the cond_data
1262  * returns true, which means that the trace array's cond_snapshot
1263  * update() operation used the cond_data to determine whether the
1264  * snapshot should be taken, and if it was, presumably saved it along
1265  * with the snapshot.
1266  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1267 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1268 {
1269 	tracing_snapshot_instance_cond(tr, cond_data);
1270 }
1271 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1272 
1273 /**
1274  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1275  * @tr:		The tracing instance
1276  *
1277  * When the user enables a conditional snapshot using
1278  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1279  * with the snapshot.  This accessor is used to retrieve it.
1280  *
1281  * Should not be called from cond_snapshot.update(), since it takes
1282  * the tr->max_lock lock, which the code calling
1283  * cond_snapshot.update() has already done.
1284  *
1285  * Returns the cond_data associated with the trace array's snapshot.
1286  */
tracing_cond_snapshot_data(struct trace_array * tr)1287 void *tracing_cond_snapshot_data(struct trace_array *tr)
1288 {
1289 	void *cond_data = NULL;
1290 
1291 	local_irq_disable();
1292 	arch_spin_lock(&tr->max_lock);
1293 
1294 	if (tr->cond_snapshot)
1295 		cond_data = tr->cond_snapshot->cond_data;
1296 
1297 	arch_spin_unlock(&tr->max_lock);
1298 	local_irq_enable();
1299 
1300 	return cond_data;
1301 }
1302 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1303 
1304 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1305 					struct array_buffer *size_buf, int cpu_id);
1306 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1307 
tracing_alloc_snapshot_instance(struct trace_array * tr)1308 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1309 {
1310 	int order;
1311 	int ret;
1312 
1313 	if (!tr->allocated_snapshot) {
1314 
1315 		/* Make the snapshot buffer have the same order as main buffer */
1316 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1317 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1318 		if (ret < 0)
1319 			return ret;
1320 
1321 		/* allocate spare buffer */
1322 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1323 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1324 		if (ret < 0)
1325 			return ret;
1326 
1327 		tr->allocated_snapshot = true;
1328 	}
1329 
1330 	return 0;
1331 }
1332 
free_snapshot(struct trace_array * tr)1333 static void free_snapshot(struct trace_array *tr)
1334 {
1335 	/*
1336 	 * We don't free the ring buffer. instead, resize it because
1337 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1338 	 * we want preserve it.
1339 	 */
1340 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1341 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1342 	set_buffer_entries(&tr->max_buffer, 1);
1343 	tracing_reset_online_cpus(&tr->max_buffer);
1344 	tr->allocated_snapshot = false;
1345 }
1346 
tracing_arm_snapshot_locked(struct trace_array * tr)1347 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1348 {
1349 	int ret;
1350 
1351 	lockdep_assert_held(&trace_types_lock);
1352 
1353 	spin_lock(&tr->snapshot_trigger_lock);
1354 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1355 		spin_unlock(&tr->snapshot_trigger_lock);
1356 		return -EBUSY;
1357 	}
1358 
1359 	tr->snapshot++;
1360 	spin_unlock(&tr->snapshot_trigger_lock);
1361 
1362 	ret = tracing_alloc_snapshot_instance(tr);
1363 	if (ret) {
1364 		spin_lock(&tr->snapshot_trigger_lock);
1365 		tr->snapshot--;
1366 		spin_unlock(&tr->snapshot_trigger_lock);
1367 	}
1368 
1369 	return ret;
1370 }
1371 
tracing_arm_snapshot(struct trace_array * tr)1372 int tracing_arm_snapshot(struct trace_array *tr)
1373 {
1374 	int ret;
1375 
1376 	mutex_lock(&trace_types_lock);
1377 	ret = tracing_arm_snapshot_locked(tr);
1378 	mutex_unlock(&trace_types_lock);
1379 
1380 	return ret;
1381 }
1382 
tracing_disarm_snapshot(struct trace_array * tr)1383 void tracing_disarm_snapshot(struct trace_array *tr)
1384 {
1385 	spin_lock(&tr->snapshot_trigger_lock);
1386 	if (!WARN_ON(!tr->snapshot))
1387 		tr->snapshot--;
1388 	spin_unlock(&tr->snapshot_trigger_lock);
1389 }
1390 
1391 /**
1392  * tracing_alloc_snapshot - allocate snapshot buffer.
1393  *
1394  * This only allocates the snapshot buffer if it isn't already
1395  * allocated - it doesn't also take a snapshot.
1396  *
1397  * This is meant to be used in cases where the snapshot buffer needs
1398  * to be set up for events that can't sleep but need to be able to
1399  * trigger a snapshot.
1400  */
tracing_alloc_snapshot(void)1401 int tracing_alloc_snapshot(void)
1402 {
1403 	struct trace_array *tr = &global_trace;
1404 	int ret;
1405 
1406 	ret = tracing_alloc_snapshot_instance(tr);
1407 	WARN_ON(ret < 0);
1408 
1409 	return ret;
1410 }
1411 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1412 
1413 /**
1414  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1415  *
1416  * This is similar to tracing_snapshot(), but it will allocate the
1417  * snapshot buffer if it isn't already allocated. Use this only
1418  * where it is safe to sleep, as the allocation may sleep.
1419  *
1420  * This causes a swap between the snapshot buffer and the current live
1421  * tracing buffer. You can use this to take snapshots of the live
1422  * trace when some condition is triggered, but continue to trace.
1423  */
tracing_snapshot_alloc(void)1424 void tracing_snapshot_alloc(void)
1425 {
1426 	int ret;
1427 
1428 	ret = tracing_alloc_snapshot();
1429 	if (ret < 0)
1430 		return;
1431 
1432 	tracing_snapshot();
1433 }
1434 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1435 
1436 /**
1437  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1438  * @tr:		The tracing instance
1439  * @cond_data:	User data to associate with the snapshot
1440  * @update:	Implementation of the cond_snapshot update function
1441  *
1442  * Check whether the conditional snapshot for the given instance has
1443  * already been enabled, or if the current tracer is already using a
1444  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1445  * save the cond_data and update function inside.
1446  *
1447  * Returns 0 if successful, error otherwise.
1448  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1449 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1450 				 cond_update_fn_t update)
1451 {
1452 	struct cond_snapshot *cond_snapshot __free(kfree) =
1453 		kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1454 	int ret;
1455 
1456 	if (!cond_snapshot)
1457 		return -ENOMEM;
1458 
1459 	cond_snapshot->cond_data = cond_data;
1460 	cond_snapshot->update = update;
1461 
1462 	guard(mutex)(&trace_types_lock);
1463 
1464 	if (tr->current_trace->use_max_tr)
1465 		return -EBUSY;
1466 
1467 	/*
1468 	 * The cond_snapshot can only change to NULL without the
1469 	 * trace_types_lock. We don't care if we race with it going
1470 	 * to NULL, but we want to make sure that it's not set to
1471 	 * something other than NULL when we get here, which we can
1472 	 * do safely with only holding the trace_types_lock and not
1473 	 * having to take the max_lock.
1474 	 */
1475 	if (tr->cond_snapshot)
1476 		return -EBUSY;
1477 
1478 	ret = tracing_arm_snapshot_locked(tr);
1479 	if (ret)
1480 		return ret;
1481 
1482 	local_irq_disable();
1483 	arch_spin_lock(&tr->max_lock);
1484 	tr->cond_snapshot = no_free_ptr(cond_snapshot);
1485 	arch_spin_unlock(&tr->max_lock);
1486 	local_irq_enable();
1487 
1488 	return 0;
1489 }
1490 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1491 
1492 /**
1493  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1494  * @tr:		The tracing instance
1495  *
1496  * Check whether the conditional snapshot for the given instance is
1497  * enabled; if so, free the cond_snapshot associated with it,
1498  * otherwise return -EINVAL.
1499  *
1500  * Returns 0 if successful, error otherwise.
1501  */
tracing_snapshot_cond_disable(struct trace_array * tr)1502 int tracing_snapshot_cond_disable(struct trace_array *tr)
1503 {
1504 	int ret = 0;
1505 
1506 	local_irq_disable();
1507 	arch_spin_lock(&tr->max_lock);
1508 
1509 	if (!tr->cond_snapshot)
1510 		ret = -EINVAL;
1511 	else {
1512 		kfree(tr->cond_snapshot);
1513 		tr->cond_snapshot = NULL;
1514 	}
1515 
1516 	arch_spin_unlock(&tr->max_lock);
1517 	local_irq_enable();
1518 
1519 	tracing_disarm_snapshot(tr);
1520 
1521 	return ret;
1522 }
1523 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1524 #else
tracing_snapshot(void)1525 void tracing_snapshot(void)
1526 {
1527 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1528 }
1529 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1530 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1531 {
1532 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1535 int tracing_alloc_snapshot(void)
1536 {
1537 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1538 	return -ENODEV;
1539 }
1540 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1541 void tracing_snapshot_alloc(void)
1542 {
1543 	/* Give warning */
1544 	tracing_snapshot();
1545 }
1546 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1547 void *tracing_cond_snapshot_data(struct trace_array *tr)
1548 {
1549 	return NULL;
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1552 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1553 {
1554 	return -ENODEV;
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1557 int tracing_snapshot_cond_disable(struct trace_array *tr)
1558 {
1559 	return false;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1562 #define free_snapshot(tr)	do { } while (0)
1563 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1564 #endif /* CONFIG_TRACER_SNAPSHOT */
1565 
tracer_tracing_off(struct trace_array * tr)1566 void tracer_tracing_off(struct trace_array *tr)
1567 {
1568 	if (tr->array_buffer.buffer)
1569 		ring_buffer_record_off(tr->array_buffer.buffer);
1570 	/*
1571 	 * This flag is looked at when buffers haven't been allocated
1572 	 * yet, or by some tracers (like irqsoff), that just want to
1573 	 * know if the ring buffer has been disabled, but it can handle
1574 	 * races of where it gets disabled but we still do a record.
1575 	 * As the check is in the fast path of the tracers, it is more
1576 	 * important to be fast than accurate.
1577 	 */
1578 	tr->buffer_disabled = 1;
1579 	/* Make the flag seen by readers */
1580 	smp_wmb();
1581 }
1582 
1583 /**
1584  * tracing_off - turn off tracing buffers
1585  *
1586  * This function stops the tracing buffers from recording data.
1587  * It does not disable any overhead the tracers themselves may
1588  * be causing. This function simply causes all recording to
1589  * the ring buffers to fail.
1590  */
tracing_off(void)1591 void tracing_off(void)
1592 {
1593 	tracer_tracing_off(&global_trace);
1594 }
1595 EXPORT_SYMBOL_GPL(tracing_off);
1596 
disable_trace_on_warning(void)1597 void disable_trace_on_warning(void)
1598 {
1599 	if (__disable_trace_on_warning) {
1600 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1601 			"Disabling tracing due to warning\n");
1602 		tracing_off();
1603 	}
1604 }
1605 
1606 /**
1607  * tracer_tracing_is_on - show real state of ring buffer enabled
1608  * @tr : the trace array to know if ring buffer is enabled
1609  *
1610  * Shows real state of the ring buffer if it is enabled or not.
1611  */
tracer_tracing_is_on(struct trace_array * tr)1612 bool tracer_tracing_is_on(struct trace_array *tr)
1613 {
1614 	if (tr->array_buffer.buffer)
1615 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1616 	return !tr->buffer_disabled;
1617 }
1618 
1619 /**
1620  * tracing_is_on - show state of ring buffers enabled
1621  */
tracing_is_on(void)1622 int tracing_is_on(void)
1623 {
1624 	return tracer_tracing_is_on(&global_trace);
1625 }
1626 EXPORT_SYMBOL_GPL(tracing_is_on);
1627 
set_buf_size(char * str)1628 static int __init set_buf_size(char *str)
1629 {
1630 	unsigned long buf_size;
1631 
1632 	if (!str)
1633 		return 0;
1634 	buf_size = memparse(str, &str);
1635 	/*
1636 	 * nr_entries can not be zero and the startup
1637 	 * tests require some buffer space. Therefore
1638 	 * ensure we have at least 4096 bytes of buffer.
1639 	 */
1640 	trace_buf_size = max(4096UL, buf_size);
1641 	return 1;
1642 }
1643 __setup("trace_buf_size=", set_buf_size);
1644 
set_tracing_thresh(char * str)1645 static int __init set_tracing_thresh(char *str)
1646 {
1647 	unsigned long threshold;
1648 	int ret;
1649 
1650 	if (!str)
1651 		return 0;
1652 	ret = kstrtoul(str, 0, &threshold);
1653 	if (ret < 0)
1654 		return 0;
1655 	tracing_thresh = threshold * 1000;
1656 	return 1;
1657 }
1658 __setup("tracing_thresh=", set_tracing_thresh);
1659 
nsecs_to_usecs(unsigned long nsecs)1660 unsigned long nsecs_to_usecs(unsigned long nsecs)
1661 {
1662 	return nsecs / 1000;
1663 }
1664 
1665 /*
1666  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1667  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1668  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1669  * of strings in the order that the evals (enum) were defined.
1670  */
1671 #undef C
1672 #define C(a, b) b
1673 
1674 /* These must match the bit positions in trace_iterator_flags */
1675 static const char *trace_options[] = {
1676 	TRACE_FLAGS
1677 	NULL
1678 };
1679 
1680 static struct {
1681 	u64 (*func)(void);
1682 	const char *name;
1683 	int in_ns;		/* is this clock in nanoseconds? */
1684 } trace_clocks[] = {
1685 	{ trace_clock_local,		"local",	1 },
1686 	{ trace_clock_global,		"global",	1 },
1687 	{ trace_clock_counter,		"counter",	0 },
1688 	{ trace_clock_jiffies,		"uptime",	0 },
1689 	{ trace_clock,			"perf",		1 },
1690 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1691 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1692 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1693 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1694 	ARCH_TRACE_CLOCKS
1695 };
1696 
trace_clock_in_ns(struct trace_array * tr)1697 bool trace_clock_in_ns(struct trace_array *tr)
1698 {
1699 	if (trace_clocks[tr->clock_id].in_ns)
1700 		return true;
1701 
1702 	return false;
1703 }
1704 
1705 /*
1706  * trace_parser_get_init - gets the buffer for trace parser
1707  */
trace_parser_get_init(struct trace_parser * parser,int size)1708 int trace_parser_get_init(struct trace_parser *parser, int size)
1709 {
1710 	memset(parser, 0, sizeof(*parser));
1711 
1712 	parser->buffer = kmalloc(size, GFP_KERNEL);
1713 	if (!parser->buffer)
1714 		return 1;
1715 
1716 	parser->size = size;
1717 	return 0;
1718 }
1719 
1720 /*
1721  * trace_parser_put - frees the buffer for trace parser
1722  */
trace_parser_put(struct trace_parser * parser)1723 void trace_parser_put(struct trace_parser *parser)
1724 {
1725 	kfree(parser->buffer);
1726 	parser->buffer = NULL;
1727 }
1728 
1729 /*
1730  * trace_get_user - reads the user input string separated by  space
1731  * (matched by isspace(ch))
1732  *
1733  * For each string found the 'struct trace_parser' is updated,
1734  * and the function returns.
1735  *
1736  * Returns number of bytes read.
1737  *
1738  * See kernel/trace/trace.h for 'struct trace_parser' details.
1739  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1740 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1741 	size_t cnt, loff_t *ppos)
1742 {
1743 	char ch;
1744 	size_t read = 0;
1745 	ssize_t ret;
1746 
1747 	if (!*ppos)
1748 		trace_parser_clear(parser);
1749 
1750 	ret = get_user(ch, ubuf++);
1751 	if (ret)
1752 		goto out;
1753 
1754 	read++;
1755 	cnt--;
1756 
1757 	/*
1758 	 * The parser is not finished with the last write,
1759 	 * continue reading the user input without skipping spaces.
1760 	 */
1761 	if (!parser->cont) {
1762 		/* skip white space */
1763 		while (cnt && isspace(ch)) {
1764 			ret = get_user(ch, ubuf++);
1765 			if (ret)
1766 				goto out;
1767 			read++;
1768 			cnt--;
1769 		}
1770 
1771 		parser->idx = 0;
1772 
1773 		/* only spaces were written */
1774 		if (isspace(ch) || !ch) {
1775 			*ppos += read;
1776 			ret = read;
1777 			goto out;
1778 		}
1779 	}
1780 
1781 	/* read the non-space input */
1782 	while (cnt && !isspace(ch) && ch) {
1783 		if (parser->idx < parser->size - 1)
1784 			parser->buffer[parser->idx++] = ch;
1785 		else {
1786 			ret = -EINVAL;
1787 			goto out;
1788 		}
1789 		ret = get_user(ch, ubuf++);
1790 		if (ret)
1791 			goto out;
1792 		read++;
1793 		cnt--;
1794 	}
1795 
1796 	/* We either got finished input or we have to wait for another call. */
1797 	if (isspace(ch) || !ch) {
1798 		parser->buffer[parser->idx] = 0;
1799 		parser->cont = false;
1800 	} else if (parser->idx < parser->size - 1) {
1801 		parser->cont = true;
1802 		parser->buffer[parser->idx++] = ch;
1803 		/* Make sure the parsed string always terminates with '\0'. */
1804 		parser->buffer[parser->idx] = 0;
1805 	} else {
1806 		ret = -EINVAL;
1807 		goto out;
1808 	}
1809 
1810 	*ppos += read;
1811 	ret = read;
1812 
1813 out:
1814 	return ret;
1815 }
1816 
1817 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1818 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1819 {
1820 	int len;
1821 
1822 	if (trace_seq_used(s) <= s->readpos)
1823 		return -EBUSY;
1824 
1825 	len = trace_seq_used(s) - s->readpos;
1826 	if (cnt > len)
1827 		cnt = len;
1828 	memcpy(buf, s->buffer + s->readpos, cnt);
1829 
1830 	s->readpos += cnt;
1831 	return cnt;
1832 }
1833 
1834 unsigned long __read_mostly	tracing_thresh;
1835 
1836 #ifdef CONFIG_TRACER_MAX_TRACE
1837 static const struct file_operations tracing_max_lat_fops;
1838 
1839 #ifdef LATENCY_FS_NOTIFY
1840 
1841 static struct workqueue_struct *fsnotify_wq;
1842 
latency_fsnotify_workfn(struct work_struct * work)1843 static void latency_fsnotify_workfn(struct work_struct *work)
1844 {
1845 	struct trace_array *tr = container_of(work, struct trace_array,
1846 					      fsnotify_work);
1847 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1848 }
1849 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1850 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1851 {
1852 	struct trace_array *tr = container_of(iwork, struct trace_array,
1853 					      fsnotify_irqwork);
1854 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1855 }
1856 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1857 static void trace_create_maxlat_file(struct trace_array *tr,
1858 				     struct dentry *d_tracer)
1859 {
1860 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1861 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1862 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1863 					      TRACE_MODE_WRITE,
1864 					      d_tracer, tr,
1865 					      &tracing_max_lat_fops);
1866 }
1867 
latency_fsnotify_init(void)1868 __init static int latency_fsnotify_init(void)
1869 {
1870 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1871 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1872 	if (!fsnotify_wq) {
1873 		pr_err("Unable to allocate tr_max_lat_wq\n");
1874 		return -ENOMEM;
1875 	}
1876 	return 0;
1877 }
1878 
1879 late_initcall_sync(latency_fsnotify_init);
1880 
latency_fsnotify(struct trace_array * tr)1881 void latency_fsnotify(struct trace_array *tr)
1882 {
1883 	if (!fsnotify_wq)
1884 		return;
1885 	/*
1886 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1887 	 * possible that we are called from __schedule() or do_idle(), which
1888 	 * could cause a deadlock.
1889 	 */
1890 	irq_work_queue(&tr->fsnotify_irqwork);
1891 }
1892 
1893 #else /* !LATENCY_FS_NOTIFY */
1894 
1895 #define trace_create_maxlat_file(tr, d_tracer)				\
1896 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1897 			  d_tracer, tr, &tracing_max_lat_fops)
1898 
1899 #endif
1900 
1901 /*
1902  * Copy the new maximum trace into the separate maximum-trace
1903  * structure. (this way the maximum trace is permanently saved,
1904  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1905  */
1906 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1907 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1908 {
1909 	struct array_buffer *trace_buf = &tr->array_buffer;
1910 	struct array_buffer *max_buf = &tr->max_buffer;
1911 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1912 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1913 
1914 	max_buf->cpu = cpu;
1915 	max_buf->time_start = data->preempt_timestamp;
1916 
1917 	max_data->saved_latency = tr->max_latency;
1918 	max_data->critical_start = data->critical_start;
1919 	max_data->critical_end = data->critical_end;
1920 
1921 	strscpy(max_data->comm, tsk->comm);
1922 	max_data->pid = tsk->pid;
1923 	/*
1924 	 * If tsk == current, then use current_uid(), as that does not use
1925 	 * RCU. The irq tracer can be called out of RCU scope.
1926 	 */
1927 	if (tsk == current)
1928 		max_data->uid = current_uid();
1929 	else
1930 		max_data->uid = task_uid(tsk);
1931 
1932 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1933 	max_data->policy = tsk->policy;
1934 	max_data->rt_priority = tsk->rt_priority;
1935 
1936 	/* record this tasks comm */
1937 	tracing_record_cmdline(tsk);
1938 	latency_fsnotify(tr);
1939 }
1940 
1941 /**
1942  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1943  * @tr: tracer
1944  * @tsk: the task with the latency
1945  * @cpu: The cpu that initiated the trace.
1946  * @cond_data: User data associated with a conditional snapshot
1947  *
1948  * Flip the buffers between the @tr and the max_tr and record information
1949  * about which task was the cause of this latency.
1950  */
1951 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1952 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1953 	      void *cond_data)
1954 {
1955 	if (tr->stop_count)
1956 		return;
1957 
1958 	WARN_ON_ONCE(!irqs_disabled());
1959 
1960 	if (!tr->allocated_snapshot) {
1961 		/* Only the nop tracer should hit this when disabling */
1962 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1963 		return;
1964 	}
1965 
1966 	arch_spin_lock(&tr->max_lock);
1967 
1968 	/* Inherit the recordable setting from array_buffer */
1969 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1970 		ring_buffer_record_on(tr->max_buffer.buffer);
1971 	else
1972 		ring_buffer_record_off(tr->max_buffer.buffer);
1973 
1974 #ifdef CONFIG_TRACER_SNAPSHOT
1975 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1976 		arch_spin_unlock(&tr->max_lock);
1977 		return;
1978 	}
1979 #endif
1980 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1981 
1982 	__update_max_tr(tr, tsk, cpu);
1983 
1984 	arch_spin_unlock(&tr->max_lock);
1985 
1986 	/* Any waiters on the old snapshot buffer need to wake up */
1987 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1988 }
1989 
1990 /**
1991  * update_max_tr_single - only copy one trace over, and reset the rest
1992  * @tr: tracer
1993  * @tsk: task with the latency
1994  * @cpu: the cpu of the buffer to copy.
1995  *
1996  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1997  */
1998 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)1999 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2000 {
2001 	int ret;
2002 
2003 	if (tr->stop_count)
2004 		return;
2005 
2006 	WARN_ON_ONCE(!irqs_disabled());
2007 	if (!tr->allocated_snapshot) {
2008 		/* Only the nop tracer should hit this when disabling */
2009 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2010 		return;
2011 	}
2012 
2013 	arch_spin_lock(&tr->max_lock);
2014 
2015 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2016 
2017 	if (ret == -EBUSY) {
2018 		/*
2019 		 * We failed to swap the buffer due to a commit taking
2020 		 * place on this CPU. We fail to record, but we reset
2021 		 * the max trace buffer (no one writes directly to it)
2022 		 * and flag that it failed.
2023 		 * Another reason is resize is in progress.
2024 		 */
2025 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2026 			"Failed to swap buffers due to commit or resize in progress\n");
2027 	}
2028 
2029 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2030 
2031 	__update_max_tr(tr, tsk, cpu);
2032 	arch_spin_unlock(&tr->max_lock);
2033 }
2034 
2035 #endif /* CONFIG_TRACER_MAX_TRACE */
2036 
2037 struct pipe_wait {
2038 	struct trace_iterator		*iter;
2039 	int				wait_index;
2040 };
2041 
wait_pipe_cond(void * data)2042 static bool wait_pipe_cond(void *data)
2043 {
2044 	struct pipe_wait *pwait = data;
2045 	struct trace_iterator *iter = pwait->iter;
2046 
2047 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2048 		return true;
2049 
2050 	return iter->closed;
2051 }
2052 
wait_on_pipe(struct trace_iterator * iter,int full)2053 static int wait_on_pipe(struct trace_iterator *iter, int full)
2054 {
2055 	struct pipe_wait pwait;
2056 	int ret;
2057 
2058 	/* Iterators are static, they should be filled or empty */
2059 	if (trace_buffer_iter(iter, iter->cpu_file))
2060 		return 0;
2061 
2062 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2063 	pwait.iter = iter;
2064 
2065 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2066 			       wait_pipe_cond, &pwait);
2067 
2068 #ifdef CONFIG_TRACER_MAX_TRACE
2069 	/*
2070 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2071 	 * to happen, this would now be the main buffer.
2072 	 */
2073 	if (iter->snapshot)
2074 		iter->array_buffer = &iter->tr->max_buffer;
2075 #endif
2076 	return ret;
2077 }
2078 
2079 #ifdef CONFIG_FTRACE_STARTUP_TEST
2080 static bool selftests_can_run;
2081 
2082 struct trace_selftests {
2083 	struct list_head		list;
2084 	struct tracer			*type;
2085 };
2086 
2087 static LIST_HEAD(postponed_selftests);
2088 
save_selftest(struct tracer * type)2089 static int save_selftest(struct tracer *type)
2090 {
2091 	struct trace_selftests *selftest;
2092 
2093 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2094 	if (!selftest)
2095 		return -ENOMEM;
2096 
2097 	selftest->type = type;
2098 	list_add(&selftest->list, &postponed_selftests);
2099 	return 0;
2100 }
2101 
run_tracer_selftest(struct tracer * type)2102 static int run_tracer_selftest(struct tracer *type)
2103 {
2104 	struct trace_array *tr = &global_trace;
2105 	struct tracer *saved_tracer = tr->current_trace;
2106 	int ret;
2107 
2108 	if (!type->selftest || tracing_selftest_disabled)
2109 		return 0;
2110 
2111 	/*
2112 	 * If a tracer registers early in boot up (before scheduling is
2113 	 * initialized and such), then do not run its selftests yet.
2114 	 * Instead, run it a little later in the boot process.
2115 	 */
2116 	if (!selftests_can_run)
2117 		return save_selftest(type);
2118 
2119 	if (!tracing_is_on()) {
2120 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2121 			type->name);
2122 		return 0;
2123 	}
2124 
2125 	/*
2126 	 * Run a selftest on this tracer.
2127 	 * Here we reset the trace buffer, and set the current
2128 	 * tracer to be this tracer. The tracer can then run some
2129 	 * internal tracing to verify that everything is in order.
2130 	 * If we fail, we do not register this tracer.
2131 	 */
2132 	tracing_reset_online_cpus(&tr->array_buffer);
2133 
2134 	tr->current_trace = type;
2135 
2136 #ifdef CONFIG_TRACER_MAX_TRACE
2137 	if (type->use_max_tr) {
2138 		/* If we expanded the buffers, make sure the max is expanded too */
2139 		if (tr->ring_buffer_expanded)
2140 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2141 					   RING_BUFFER_ALL_CPUS);
2142 		tr->allocated_snapshot = true;
2143 	}
2144 #endif
2145 
2146 	/* the test is responsible for initializing and enabling */
2147 	pr_info("Testing tracer %s: ", type->name);
2148 	ret = type->selftest(type, tr);
2149 	/* the test is responsible for resetting too */
2150 	tr->current_trace = saved_tracer;
2151 	if (ret) {
2152 		printk(KERN_CONT "FAILED!\n");
2153 		/* Add the warning after printing 'FAILED' */
2154 		WARN_ON(1);
2155 		return -1;
2156 	}
2157 	/* Only reset on passing, to avoid touching corrupted buffers */
2158 	tracing_reset_online_cpus(&tr->array_buffer);
2159 
2160 #ifdef CONFIG_TRACER_MAX_TRACE
2161 	if (type->use_max_tr) {
2162 		tr->allocated_snapshot = false;
2163 
2164 		/* Shrink the max buffer again */
2165 		if (tr->ring_buffer_expanded)
2166 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2167 					   RING_BUFFER_ALL_CPUS);
2168 	}
2169 #endif
2170 
2171 	printk(KERN_CONT "PASSED\n");
2172 	return 0;
2173 }
2174 
do_run_tracer_selftest(struct tracer * type)2175 static int do_run_tracer_selftest(struct tracer *type)
2176 {
2177 	int ret;
2178 
2179 	/*
2180 	 * Tests can take a long time, especially if they are run one after the
2181 	 * other, as does happen during bootup when all the tracers are
2182 	 * registered. This could cause the soft lockup watchdog to trigger.
2183 	 */
2184 	cond_resched();
2185 
2186 	tracing_selftest_running = true;
2187 	ret = run_tracer_selftest(type);
2188 	tracing_selftest_running = false;
2189 
2190 	return ret;
2191 }
2192 
init_trace_selftests(void)2193 static __init int init_trace_selftests(void)
2194 {
2195 	struct trace_selftests *p, *n;
2196 	struct tracer *t, **last;
2197 	int ret;
2198 
2199 	selftests_can_run = true;
2200 
2201 	guard(mutex)(&trace_types_lock);
2202 
2203 	if (list_empty(&postponed_selftests))
2204 		return 0;
2205 
2206 	pr_info("Running postponed tracer tests:\n");
2207 
2208 	tracing_selftest_running = true;
2209 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2210 		/* This loop can take minutes when sanitizers are enabled, so
2211 		 * lets make sure we allow RCU processing.
2212 		 */
2213 		cond_resched();
2214 		ret = run_tracer_selftest(p->type);
2215 		/* If the test fails, then warn and remove from available_tracers */
2216 		if (ret < 0) {
2217 			WARN(1, "tracer: %s failed selftest, disabling\n",
2218 			     p->type->name);
2219 			last = &trace_types;
2220 			for (t = trace_types; t; t = t->next) {
2221 				if (t == p->type) {
2222 					*last = t->next;
2223 					break;
2224 				}
2225 				last = &t->next;
2226 			}
2227 		}
2228 		list_del(&p->list);
2229 		kfree(p);
2230 	}
2231 	tracing_selftest_running = false;
2232 
2233 	return 0;
2234 }
2235 core_initcall(init_trace_selftests);
2236 #else
do_run_tracer_selftest(struct tracer * type)2237 static inline int do_run_tracer_selftest(struct tracer *type)
2238 {
2239 	return 0;
2240 }
2241 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2242 
2243 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2244 
2245 static void __init apply_trace_boot_options(void);
2246 
2247 /**
2248  * register_tracer - register a tracer with the ftrace system.
2249  * @type: the plugin for the tracer
2250  *
2251  * Register a new plugin tracer.
2252  */
register_tracer(struct tracer * type)2253 int __init register_tracer(struct tracer *type)
2254 {
2255 	struct tracer *t;
2256 	int ret = 0;
2257 
2258 	if (!type->name) {
2259 		pr_info("Tracer must have a name\n");
2260 		return -1;
2261 	}
2262 
2263 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2264 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2265 		return -1;
2266 	}
2267 
2268 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2269 		pr_warn("Can not register tracer %s due to lockdown\n",
2270 			   type->name);
2271 		return -EPERM;
2272 	}
2273 
2274 	mutex_lock(&trace_types_lock);
2275 
2276 	for (t = trace_types; t; t = t->next) {
2277 		if (strcmp(type->name, t->name) == 0) {
2278 			/* already found */
2279 			pr_info("Tracer %s already registered\n",
2280 				type->name);
2281 			ret = -1;
2282 			goto out;
2283 		}
2284 	}
2285 
2286 	if (!type->set_flag)
2287 		type->set_flag = &dummy_set_flag;
2288 	if (!type->flags) {
2289 		/*allocate a dummy tracer_flags*/
2290 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2291 		if (!type->flags) {
2292 			ret = -ENOMEM;
2293 			goto out;
2294 		}
2295 		type->flags->val = 0;
2296 		type->flags->opts = dummy_tracer_opt;
2297 	} else
2298 		if (!type->flags->opts)
2299 			type->flags->opts = dummy_tracer_opt;
2300 
2301 	/* store the tracer for __set_tracer_option */
2302 	type->flags->trace = type;
2303 
2304 	ret = do_run_tracer_selftest(type);
2305 	if (ret < 0)
2306 		goto out;
2307 
2308 	type->next = trace_types;
2309 	trace_types = type;
2310 	add_tracer_options(&global_trace, type);
2311 
2312  out:
2313 	mutex_unlock(&trace_types_lock);
2314 
2315 	if (ret || !default_bootup_tracer)
2316 		goto out_unlock;
2317 
2318 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2319 		goto out_unlock;
2320 
2321 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2322 	/* Do we want this tracer to start on bootup? */
2323 	tracing_set_tracer(&global_trace, type->name);
2324 	default_bootup_tracer = NULL;
2325 
2326 	apply_trace_boot_options();
2327 
2328 	/* disable other selftests, since this will break it. */
2329 	disable_tracing_selftest("running a tracer");
2330 
2331  out_unlock:
2332 	return ret;
2333 }
2334 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2335 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2336 {
2337 	struct trace_buffer *buffer = buf->buffer;
2338 
2339 	if (!buffer)
2340 		return;
2341 
2342 	ring_buffer_record_disable(buffer);
2343 
2344 	/* Make sure all commits have finished */
2345 	synchronize_rcu();
2346 	ring_buffer_reset_cpu(buffer, cpu);
2347 
2348 	ring_buffer_record_enable(buffer);
2349 }
2350 
tracing_reset_online_cpus(struct array_buffer * buf)2351 void tracing_reset_online_cpus(struct array_buffer *buf)
2352 {
2353 	struct trace_buffer *buffer = buf->buffer;
2354 
2355 	if (!buffer)
2356 		return;
2357 
2358 	ring_buffer_record_disable(buffer);
2359 
2360 	/* Make sure all commits have finished */
2361 	synchronize_rcu();
2362 
2363 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2364 
2365 	ring_buffer_reset_online_cpus(buffer);
2366 
2367 	ring_buffer_record_enable(buffer);
2368 }
2369 
tracing_reset_all_cpus(struct array_buffer * buf)2370 static void tracing_reset_all_cpus(struct array_buffer *buf)
2371 {
2372 	struct trace_buffer *buffer = buf->buffer;
2373 
2374 	if (!buffer)
2375 		return;
2376 
2377 	ring_buffer_record_disable(buffer);
2378 
2379 	/* Make sure all commits have finished */
2380 	synchronize_rcu();
2381 
2382 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2383 
2384 	ring_buffer_reset(buffer);
2385 
2386 	ring_buffer_record_enable(buffer);
2387 }
2388 
2389 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2390 void tracing_reset_all_online_cpus_unlocked(void)
2391 {
2392 	struct trace_array *tr;
2393 
2394 	lockdep_assert_held(&trace_types_lock);
2395 
2396 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2397 		if (!tr->clear_trace)
2398 			continue;
2399 		tr->clear_trace = false;
2400 		tracing_reset_online_cpus(&tr->array_buffer);
2401 #ifdef CONFIG_TRACER_MAX_TRACE
2402 		tracing_reset_online_cpus(&tr->max_buffer);
2403 #endif
2404 	}
2405 }
2406 
tracing_reset_all_online_cpus(void)2407 void tracing_reset_all_online_cpus(void)
2408 {
2409 	mutex_lock(&trace_types_lock);
2410 	tracing_reset_all_online_cpus_unlocked();
2411 	mutex_unlock(&trace_types_lock);
2412 }
2413 
is_tracing_stopped(void)2414 int is_tracing_stopped(void)
2415 {
2416 	return global_trace.stop_count;
2417 }
2418 
tracing_start_tr(struct trace_array * tr)2419 static void tracing_start_tr(struct trace_array *tr)
2420 {
2421 	struct trace_buffer *buffer;
2422 	unsigned long flags;
2423 
2424 	if (tracing_disabled)
2425 		return;
2426 
2427 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2428 	if (--tr->stop_count) {
2429 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2430 			/* Someone screwed up their debugging */
2431 			tr->stop_count = 0;
2432 		}
2433 		goto out;
2434 	}
2435 
2436 	/* Prevent the buffers from switching */
2437 	arch_spin_lock(&tr->max_lock);
2438 
2439 	buffer = tr->array_buffer.buffer;
2440 	if (buffer)
2441 		ring_buffer_record_enable(buffer);
2442 
2443 #ifdef CONFIG_TRACER_MAX_TRACE
2444 	buffer = tr->max_buffer.buffer;
2445 	if (buffer)
2446 		ring_buffer_record_enable(buffer);
2447 #endif
2448 
2449 	arch_spin_unlock(&tr->max_lock);
2450 
2451  out:
2452 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2453 }
2454 
2455 /**
2456  * tracing_start - quick start of the tracer
2457  *
2458  * If tracing is enabled but was stopped by tracing_stop,
2459  * this will start the tracer back up.
2460  */
tracing_start(void)2461 void tracing_start(void)
2462 
2463 {
2464 	return tracing_start_tr(&global_trace);
2465 }
2466 
tracing_stop_tr(struct trace_array * tr)2467 static void tracing_stop_tr(struct trace_array *tr)
2468 {
2469 	struct trace_buffer *buffer;
2470 	unsigned long flags;
2471 
2472 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2473 	if (tr->stop_count++)
2474 		goto out;
2475 
2476 	/* Prevent the buffers from switching */
2477 	arch_spin_lock(&tr->max_lock);
2478 
2479 	buffer = tr->array_buffer.buffer;
2480 	if (buffer)
2481 		ring_buffer_record_disable(buffer);
2482 
2483 #ifdef CONFIG_TRACER_MAX_TRACE
2484 	buffer = tr->max_buffer.buffer;
2485 	if (buffer)
2486 		ring_buffer_record_disable(buffer);
2487 #endif
2488 
2489 	arch_spin_unlock(&tr->max_lock);
2490 
2491  out:
2492 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2493 }
2494 
2495 /**
2496  * tracing_stop - quick stop of the tracer
2497  *
2498  * Light weight way to stop tracing. Use in conjunction with
2499  * tracing_start.
2500  */
tracing_stop(void)2501 void tracing_stop(void)
2502 {
2503 	return tracing_stop_tr(&global_trace);
2504 }
2505 
2506 /*
2507  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2508  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2509  * simplifies those functions and keeps them in sync.
2510  */
trace_handle_return(struct trace_seq * s)2511 enum print_line_t trace_handle_return(struct trace_seq *s)
2512 {
2513 	return trace_seq_has_overflowed(s) ?
2514 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2515 }
2516 EXPORT_SYMBOL_GPL(trace_handle_return);
2517 
migration_disable_value(void)2518 static unsigned short migration_disable_value(void)
2519 {
2520 #if defined(CONFIG_SMP)
2521 	return current->migration_disabled;
2522 #else
2523 	return 0;
2524 #endif
2525 }
2526 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2527 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2528 {
2529 	unsigned int trace_flags = irqs_status;
2530 	unsigned int pc;
2531 
2532 	pc = preempt_count();
2533 
2534 	if (pc & NMI_MASK)
2535 		trace_flags |= TRACE_FLAG_NMI;
2536 	if (pc & HARDIRQ_MASK)
2537 		trace_flags |= TRACE_FLAG_HARDIRQ;
2538 	if (in_serving_softirq())
2539 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2540 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2541 		trace_flags |= TRACE_FLAG_BH_OFF;
2542 
2543 	if (tif_need_resched())
2544 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2545 	if (test_preempt_need_resched())
2546 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2547 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2548 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2549 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2550 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2551 }
2552 
2553 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2554 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2555 			  int type,
2556 			  unsigned long len,
2557 			  unsigned int trace_ctx)
2558 {
2559 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2560 }
2561 
2562 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2563 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2564 static int trace_buffered_event_ref;
2565 
2566 /**
2567  * trace_buffered_event_enable - enable buffering events
2568  *
2569  * When events are being filtered, it is quicker to use a temporary
2570  * buffer to write the event data into if there's a likely chance
2571  * that it will not be committed. The discard of the ring buffer
2572  * is not as fast as committing, and is much slower than copying
2573  * a commit.
2574  *
2575  * When an event is to be filtered, allocate per cpu buffers to
2576  * write the event data into, and if the event is filtered and discarded
2577  * it is simply dropped, otherwise, the entire data is to be committed
2578  * in one shot.
2579  */
trace_buffered_event_enable(void)2580 void trace_buffered_event_enable(void)
2581 {
2582 	struct ring_buffer_event *event;
2583 	struct page *page;
2584 	int cpu;
2585 
2586 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2587 
2588 	if (trace_buffered_event_ref++)
2589 		return;
2590 
2591 	for_each_tracing_cpu(cpu) {
2592 		page = alloc_pages_node(cpu_to_node(cpu),
2593 					GFP_KERNEL | __GFP_NORETRY, 0);
2594 		/* This is just an optimization and can handle failures */
2595 		if (!page) {
2596 			pr_err("Failed to allocate event buffer\n");
2597 			break;
2598 		}
2599 
2600 		event = page_address(page);
2601 		memset(event, 0, sizeof(*event));
2602 
2603 		per_cpu(trace_buffered_event, cpu) = event;
2604 
2605 		preempt_disable();
2606 		if (cpu == smp_processor_id() &&
2607 		    __this_cpu_read(trace_buffered_event) !=
2608 		    per_cpu(trace_buffered_event, cpu))
2609 			WARN_ON_ONCE(1);
2610 		preempt_enable();
2611 	}
2612 }
2613 
enable_trace_buffered_event(void * data)2614 static void enable_trace_buffered_event(void *data)
2615 {
2616 	/* Probably not needed, but do it anyway */
2617 	smp_rmb();
2618 	this_cpu_dec(trace_buffered_event_cnt);
2619 }
2620 
disable_trace_buffered_event(void * data)2621 static void disable_trace_buffered_event(void *data)
2622 {
2623 	this_cpu_inc(trace_buffered_event_cnt);
2624 }
2625 
2626 /**
2627  * trace_buffered_event_disable - disable buffering events
2628  *
2629  * When a filter is removed, it is faster to not use the buffered
2630  * events, and to commit directly into the ring buffer. Free up
2631  * the temp buffers when there are no more users. This requires
2632  * special synchronization with current events.
2633  */
trace_buffered_event_disable(void)2634 void trace_buffered_event_disable(void)
2635 {
2636 	int cpu;
2637 
2638 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2639 
2640 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2641 		return;
2642 
2643 	if (--trace_buffered_event_ref)
2644 		return;
2645 
2646 	/* For each CPU, set the buffer as used. */
2647 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2648 			 NULL, true);
2649 
2650 	/* Wait for all current users to finish */
2651 	synchronize_rcu();
2652 
2653 	for_each_tracing_cpu(cpu) {
2654 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2655 		per_cpu(trace_buffered_event, cpu) = NULL;
2656 	}
2657 
2658 	/*
2659 	 * Wait for all CPUs that potentially started checking if they can use
2660 	 * their event buffer only after the previous synchronize_rcu() call and
2661 	 * they still read a valid pointer from trace_buffered_event. It must be
2662 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2663 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2664 	 */
2665 	synchronize_rcu();
2666 
2667 	/* For each CPU, relinquish the buffer */
2668 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2669 			 true);
2670 }
2671 
2672 static struct trace_buffer *temp_buffer;
2673 
2674 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2675 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2676 			  struct trace_event_file *trace_file,
2677 			  int type, unsigned long len,
2678 			  unsigned int trace_ctx)
2679 {
2680 	struct ring_buffer_event *entry;
2681 	struct trace_array *tr = trace_file->tr;
2682 	int val;
2683 
2684 	*current_rb = tr->array_buffer.buffer;
2685 
2686 	if (!tr->no_filter_buffering_ref &&
2687 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2688 		preempt_disable_notrace();
2689 		/*
2690 		 * Filtering is on, so try to use the per cpu buffer first.
2691 		 * This buffer will simulate a ring_buffer_event,
2692 		 * where the type_len is zero and the array[0] will
2693 		 * hold the full length.
2694 		 * (see include/linux/ring-buffer.h for details on
2695 		 *  how the ring_buffer_event is structured).
2696 		 *
2697 		 * Using a temp buffer during filtering and copying it
2698 		 * on a matched filter is quicker than writing directly
2699 		 * into the ring buffer and then discarding it when
2700 		 * it doesn't match. That is because the discard
2701 		 * requires several atomic operations to get right.
2702 		 * Copying on match and doing nothing on a failed match
2703 		 * is still quicker than no copy on match, but having
2704 		 * to discard out of the ring buffer on a failed match.
2705 		 */
2706 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2707 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2708 
2709 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2710 
2711 			/*
2712 			 * Preemption is disabled, but interrupts and NMIs
2713 			 * can still come in now. If that happens after
2714 			 * the above increment, then it will have to go
2715 			 * back to the old method of allocating the event
2716 			 * on the ring buffer, and if the filter fails, it
2717 			 * will have to call ring_buffer_discard_commit()
2718 			 * to remove it.
2719 			 *
2720 			 * Need to also check the unlikely case that the
2721 			 * length is bigger than the temp buffer size.
2722 			 * If that happens, then the reserve is pretty much
2723 			 * guaranteed to fail, as the ring buffer currently
2724 			 * only allows events less than a page. But that may
2725 			 * change in the future, so let the ring buffer reserve
2726 			 * handle the failure in that case.
2727 			 */
2728 			if (val == 1 && likely(len <= max_len)) {
2729 				trace_event_setup(entry, type, trace_ctx);
2730 				entry->array[0] = len;
2731 				/* Return with preemption disabled */
2732 				return entry;
2733 			}
2734 			this_cpu_dec(trace_buffered_event_cnt);
2735 		}
2736 		/* __trace_buffer_lock_reserve() disables preemption */
2737 		preempt_enable_notrace();
2738 	}
2739 
2740 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2741 					    trace_ctx);
2742 	/*
2743 	 * If tracing is off, but we have triggers enabled
2744 	 * we still need to look at the event data. Use the temp_buffer
2745 	 * to store the trace event for the trigger to use. It's recursive
2746 	 * safe and will not be recorded anywhere.
2747 	 */
2748 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2749 		*current_rb = temp_buffer;
2750 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2751 						    trace_ctx);
2752 	}
2753 	return entry;
2754 }
2755 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2756 
2757 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2758 static DEFINE_MUTEX(tracepoint_printk_mutex);
2759 
output_printk(struct trace_event_buffer * fbuffer)2760 static void output_printk(struct trace_event_buffer *fbuffer)
2761 {
2762 	struct trace_event_call *event_call;
2763 	struct trace_event_file *file;
2764 	struct trace_event *event;
2765 	unsigned long flags;
2766 	struct trace_iterator *iter = tracepoint_print_iter;
2767 
2768 	/* We should never get here if iter is NULL */
2769 	if (WARN_ON_ONCE(!iter))
2770 		return;
2771 
2772 	event_call = fbuffer->trace_file->event_call;
2773 	if (!event_call || !event_call->event.funcs ||
2774 	    !event_call->event.funcs->trace)
2775 		return;
2776 
2777 	file = fbuffer->trace_file;
2778 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2779 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2780 	     !filter_match_preds(file->filter, fbuffer->entry)))
2781 		return;
2782 
2783 	event = &fbuffer->trace_file->event_call->event;
2784 
2785 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2786 	trace_seq_init(&iter->seq);
2787 	iter->ent = fbuffer->entry;
2788 	event_call->event.funcs->trace(iter, 0, event);
2789 	trace_seq_putc(&iter->seq, 0);
2790 	printk("%s", iter->seq.buffer);
2791 
2792 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2793 }
2794 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2795 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2796 			     void *buffer, size_t *lenp,
2797 			     loff_t *ppos)
2798 {
2799 	int save_tracepoint_printk;
2800 	int ret;
2801 
2802 	guard(mutex)(&tracepoint_printk_mutex);
2803 	save_tracepoint_printk = tracepoint_printk;
2804 
2805 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2806 
2807 	/*
2808 	 * This will force exiting early, as tracepoint_printk
2809 	 * is always zero when tracepoint_printk_iter is not allocated
2810 	 */
2811 	if (!tracepoint_print_iter)
2812 		tracepoint_printk = 0;
2813 
2814 	if (save_tracepoint_printk == tracepoint_printk)
2815 		return ret;
2816 
2817 	if (tracepoint_printk)
2818 		static_key_enable(&tracepoint_printk_key.key);
2819 	else
2820 		static_key_disable(&tracepoint_printk_key.key);
2821 
2822 	return ret;
2823 }
2824 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2825 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2826 {
2827 	enum event_trigger_type tt = ETT_NONE;
2828 	struct trace_event_file *file = fbuffer->trace_file;
2829 
2830 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2831 			fbuffer->entry, &tt))
2832 		goto discard;
2833 
2834 	if (static_key_false(&tracepoint_printk_key.key))
2835 		output_printk(fbuffer);
2836 
2837 	if (static_branch_unlikely(&trace_event_exports_enabled))
2838 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2839 
2840 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2841 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2842 
2843 discard:
2844 	if (tt)
2845 		event_triggers_post_call(file, tt);
2846 
2847 }
2848 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2849 
2850 /*
2851  * Skip 3:
2852  *
2853  *   trace_buffer_unlock_commit_regs()
2854  *   trace_event_buffer_commit()
2855  *   trace_event_raw_event_xxx()
2856  */
2857 # define STACK_SKIP 3
2858 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2859 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2860 				     struct trace_buffer *buffer,
2861 				     struct ring_buffer_event *event,
2862 				     unsigned int trace_ctx,
2863 				     struct pt_regs *regs)
2864 {
2865 	__buffer_unlock_commit(buffer, event);
2866 
2867 	/*
2868 	 * If regs is not set, then skip the necessary functions.
2869 	 * Note, we can still get here via blktrace, wakeup tracer
2870 	 * and mmiotrace, but that's ok if they lose a function or
2871 	 * two. They are not that meaningful.
2872 	 */
2873 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2874 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2875 }
2876 
2877 /*
2878  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2879  */
2880 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2881 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2882 				   struct ring_buffer_event *event)
2883 {
2884 	__buffer_unlock_commit(buffer, event);
2885 }
2886 
2887 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx,struct ftrace_regs * fregs)2888 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2889 	       parent_ip, unsigned int trace_ctx, struct ftrace_regs *fregs)
2890 {
2891 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2892 	struct ring_buffer_event *event;
2893 	struct ftrace_entry *entry;
2894 	int size = sizeof(*entry);
2895 
2896 	size += FTRACE_REGS_MAX_ARGS * !!fregs * sizeof(long);
2897 
2898 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, size,
2899 					    trace_ctx);
2900 	if (!event)
2901 		return;
2902 	entry	= ring_buffer_event_data(event);
2903 	entry->ip			= ip;
2904 	entry->parent_ip		= parent_ip;
2905 
2906 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
2907 	if (fregs) {
2908 		for (int i = 0; i < FTRACE_REGS_MAX_ARGS; i++)
2909 			entry->args[i] = ftrace_regs_get_argument(fregs, i);
2910 	}
2911 #endif
2912 
2913 	if (static_branch_unlikely(&trace_function_exports_enabled))
2914 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2915 	__buffer_unlock_commit(buffer, event);
2916 }
2917 
2918 #ifdef CONFIG_STACKTRACE
2919 
2920 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2921 #define FTRACE_KSTACK_NESTING	4
2922 
2923 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2924 
2925 struct ftrace_stack {
2926 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2927 };
2928 
2929 
2930 struct ftrace_stacks {
2931 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2932 };
2933 
2934 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2935 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2936 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2937 static void __ftrace_trace_stack(struct trace_array *tr,
2938 				 struct trace_buffer *buffer,
2939 				 unsigned int trace_ctx,
2940 				 int skip, struct pt_regs *regs)
2941 {
2942 	struct ring_buffer_event *event;
2943 	unsigned int size, nr_entries;
2944 	struct ftrace_stack *fstack;
2945 	struct stack_entry *entry;
2946 	int stackidx;
2947 
2948 	/*
2949 	 * Add one, for this function and the call to save_stack_trace()
2950 	 * If regs is set, then these functions will not be in the way.
2951 	 */
2952 #ifndef CONFIG_UNWINDER_ORC
2953 	if (!regs)
2954 		skip++;
2955 #endif
2956 
2957 	preempt_disable_notrace();
2958 
2959 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2960 
2961 	/* This should never happen. If it does, yell once and skip */
2962 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2963 		goto out;
2964 
2965 	/*
2966 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2967 	 * interrupt will either see the value pre increment or post
2968 	 * increment. If the interrupt happens pre increment it will have
2969 	 * restored the counter when it returns.  We just need a barrier to
2970 	 * keep gcc from moving things around.
2971 	 */
2972 	barrier();
2973 
2974 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2975 	size = ARRAY_SIZE(fstack->calls);
2976 
2977 	if (regs) {
2978 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2979 						   size, skip);
2980 	} else {
2981 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2982 	}
2983 
2984 #ifdef CONFIG_DYNAMIC_FTRACE
2985 	/* Mark entry of stack trace as trampoline code */
2986 	if (tr->ops && tr->ops->trampoline) {
2987 		unsigned long tramp_start = tr->ops->trampoline;
2988 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2989 		unsigned long *calls = fstack->calls;
2990 
2991 		for (int i = 0; i < nr_entries; i++) {
2992 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2993 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2994 		}
2995 	}
2996 #endif
2997 
2998 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
2999 				    struct_size(entry, caller, nr_entries),
3000 				    trace_ctx);
3001 	if (!event)
3002 		goto out;
3003 	entry = ring_buffer_event_data(event);
3004 
3005 	entry->size = nr_entries;
3006 	memcpy(&entry->caller, fstack->calls,
3007 	       flex_array_size(entry, caller, nr_entries));
3008 
3009 	__buffer_unlock_commit(buffer, event);
3010 
3011  out:
3012 	/* Again, don't let gcc optimize things here */
3013 	barrier();
3014 	__this_cpu_dec(ftrace_stack_reserve);
3015 	preempt_enable_notrace();
3016 
3017 }
3018 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3019 static inline void ftrace_trace_stack(struct trace_array *tr,
3020 				      struct trace_buffer *buffer,
3021 				      unsigned int trace_ctx,
3022 				      int skip, struct pt_regs *regs)
3023 {
3024 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3025 		return;
3026 
3027 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3028 }
3029 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3030 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3031 		   int skip)
3032 {
3033 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3034 
3035 	if (rcu_is_watching()) {
3036 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3037 		return;
3038 	}
3039 
3040 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3041 		return;
3042 
3043 	/*
3044 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3045 	 * but if the above rcu_is_watching() failed, then the NMI
3046 	 * triggered someplace critical, and ct_irq_enter() should
3047 	 * not be called from NMI.
3048 	 */
3049 	if (unlikely(in_nmi()))
3050 		return;
3051 
3052 	ct_irq_enter_irqson();
3053 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3054 	ct_irq_exit_irqson();
3055 }
3056 
3057 /**
3058  * trace_dump_stack - record a stack back trace in the trace buffer
3059  * @skip: Number of functions to skip (helper handlers)
3060  */
trace_dump_stack(int skip)3061 void trace_dump_stack(int skip)
3062 {
3063 	if (tracing_disabled || tracing_selftest_running)
3064 		return;
3065 
3066 #ifndef CONFIG_UNWINDER_ORC
3067 	/* Skip 1 to skip this function. */
3068 	skip++;
3069 #endif
3070 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3071 				tracing_gen_ctx(), skip, NULL);
3072 }
3073 EXPORT_SYMBOL_GPL(trace_dump_stack);
3074 
3075 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3076 static DEFINE_PER_CPU(int, user_stack_count);
3077 
3078 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3079 ftrace_trace_userstack(struct trace_array *tr,
3080 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3081 {
3082 	struct ring_buffer_event *event;
3083 	struct userstack_entry *entry;
3084 
3085 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3086 		return;
3087 
3088 	/*
3089 	 * NMIs can not handle page faults, even with fix ups.
3090 	 * The save user stack can (and often does) fault.
3091 	 */
3092 	if (unlikely(in_nmi()))
3093 		return;
3094 
3095 	/*
3096 	 * prevent recursion, since the user stack tracing may
3097 	 * trigger other kernel events.
3098 	 */
3099 	preempt_disable();
3100 	if (__this_cpu_read(user_stack_count))
3101 		goto out;
3102 
3103 	__this_cpu_inc(user_stack_count);
3104 
3105 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3106 					    sizeof(*entry), trace_ctx);
3107 	if (!event)
3108 		goto out_drop_count;
3109 	entry	= ring_buffer_event_data(event);
3110 
3111 	entry->tgid		= current->tgid;
3112 	memset(&entry->caller, 0, sizeof(entry->caller));
3113 
3114 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3115 	__buffer_unlock_commit(buffer, event);
3116 
3117  out_drop_count:
3118 	__this_cpu_dec(user_stack_count);
3119  out:
3120 	preempt_enable();
3121 }
3122 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3123 static void ftrace_trace_userstack(struct trace_array *tr,
3124 				   struct trace_buffer *buffer,
3125 				   unsigned int trace_ctx)
3126 {
3127 }
3128 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3129 
3130 #endif /* CONFIG_STACKTRACE */
3131 
3132 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3133 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3134 			  unsigned long long delta)
3135 {
3136 	entry->bottom_delta_ts = delta & U32_MAX;
3137 	entry->top_delta_ts = (delta >> 32);
3138 }
3139 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3140 void trace_last_func_repeats(struct trace_array *tr,
3141 			     struct trace_func_repeats *last_info,
3142 			     unsigned int trace_ctx)
3143 {
3144 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3145 	struct func_repeats_entry *entry;
3146 	struct ring_buffer_event *event;
3147 	u64 delta;
3148 
3149 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3150 					    sizeof(*entry), trace_ctx);
3151 	if (!event)
3152 		return;
3153 
3154 	delta = ring_buffer_event_time_stamp(buffer, event) -
3155 		last_info->ts_last_call;
3156 
3157 	entry = ring_buffer_event_data(event);
3158 	entry->ip = last_info->ip;
3159 	entry->parent_ip = last_info->parent_ip;
3160 	entry->count = last_info->count;
3161 	func_repeats_set_delta_ts(entry, delta);
3162 
3163 	__buffer_unlock_commit(buffer, event);
3164 }
3165 
3166 /* created for use with alloc_percpu */
3167 struct trace_buffer_struct {
3168 	int nesting;
3169 	char buffer[4][TRACE_BUF_SIZE];
3170 };
3171 
3172 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3173 
3174 /*
3175  * This allows for lockless recording.  If we're nested too deeply, then
3176  * this returns NULL.
3177  */
get_trace_buf(void)3178 static char *get_trace_buf(void)
3179 {
3180 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3181 
3182 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3183 		return NULL;
3184 
3185 	buffer->nesting++;
3186 
3187 	/* Interrupts must see nesting incremented before we use the buffer */
3188 	barrier();
3189 	return &buffer->buffer[buffer->nesting - 1][0];
3190 }
3191 
put_trace_buf(void)3192 static void put_trace_buf(void)
3193 {
3194 	/* Don't let the decrement of nesting leak before this */
3195 	barrier();
3196 	this_cpu_dec(trace_percpu_buffer->nesting);
3197 }
3198 
alloc_percpu_trace_buffer(void)3199 static int alloc_percpu_trace_buffer(void)
3200 {
3201 	struct trace_buffer_struct __percpu *buffers;
3202 
3203 	if (trace_percpu_buffer)
3204 		return 0;
3205 
3206 	buffers = alloc_percpu(struct trace_buffer_struct);
3207 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3208 		return -ENOMEM;
3209 
3210 	trace_percpu_buffer = buffers;
3211 	return 0;
3212 }
3213 
3214 static int buffers_allocated;
3215 
trace_printk_init_buffers(void)3216 void trace_printk_init_buffers(void)
3217 {
3218 	if (buffers_allocated)
3219 		return;
3220 
3221 	if (alloc_percpu_trace_buffer())
3222 		return;
3223 
3224 	/* trace_printk() is for debug use only. Don't use it in production. */
3225 
3226 	pr_warn("\n");
3227 	pr_warn("**********************************************************\n");
3228 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3229 	pr_warn("**                                                      **\n");
3230 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3231 	pr_warn("**                                                      **\n");
3232 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3233 	pr_warn("** unsafe for production use.                           **\n");
3234 	pr_warn("**                                                      **\n");
3235 	pr_warn("** If you see this message and you are not debugging    **\n");
3236 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3237 	pr_warn("**                                                      **\n");
3238 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3239 	pr_warn("**********************************************************\n");
3240 
3241 	/* Expand the buffers to set size */
3242 	tracing_update_buffers(&global_trace);
3243 
3244 	buffers_allocated = 1;
3245 
3246 	/*
3247 	 * trace_printk_init_buffers() can be called by modules.
3248 	 * If that happens, then we need to start cmdline recording
3249 	 * directly here. If the global_trace.buffer is already
3250 	 * allocated here, then this was called by module code.
3251 	 */
3252 	if (global_trace.array_buffer.buffer)
3253 		tracing_start_cmdline_record();
3254 }
3255 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3256 
trace_printk_start_comm(void)3257 void trace_printk_start_comm(void)
3258 {
3259 	/* Start tracing comms if trace printk is set */
3260 	if (!buffers_allocated)
3261 		return;
3262 	tracing_start_cmdline_record();
3263 }
3264 
trace_printk_start_stop_comm(int enabled)3265 static void trace_printk_start_stop_comm(int enabled)
3266 {
3267 	if (!buffers_allocated)
3268 		return;
3269 
3270 	if (enabled)
3271 		tracing_start_cmdline_record();
3272 	else
3273 		tracing_stop_cmdline_record();
3274 }
3275 
3276 /**
3277  * trace_vbprintk - write binary msg to tracing buffer
3278  * @ip:    The address of the caller
3279  * @fmt:   The string format to write to the buffer
3280  * @args:  Arguments for @fmt
3281  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3282 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3283 {
3284 	struct ring_buffer_event *event;
3285 	struct trace_buffer *buffer;
3286 	struct trace_array *tr = READ_ONCE(printk_trace);
3287 	struct bprint_entry *entry;
3288 	unsigned int trace_ctx;
3289 	char *tbuffer;
3290 	int len = 0, size;
3291 
3292 	if (!printk_binsafe(tr))
3293 		return trace_vprintk(ip, fmt, args);
3294 
3295 	if (unlikely(tracing_selftest_running || tracing_disabled))
3296 		return 0;
3297 
3298 	/* Don't pollute graph traces with trace_vprintk internals */
3299 	pause_graph_tracing();
3300 
3301 	trace_ctx = tracing_gen_ctx();
3302 	preempt_disable_notrace();
3303 
3304 	tbuffer = get_trace_buf();
3305 	if (!tbuffer) {
3306 		len = 0;
3307 		goto out_nobuffer;
3308 	}
3309 
3310 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3311 
3312 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3313 		goto out_put;
3314 
3315 	size = sizeof(*entry) + sizeof(u32) * len;
3316 	buffer = tr->array_buffer.buffer;
3317 	ring_buffer_nest_start(buffer);
3318 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3319 					    trace_ctx);
3320 	if (!event)
3321 		goto out;
3322 	entry = ring_buffer_event_data(event);
3323 	entry->ip			= ip;
3324 	entry->fmt			= fmt;
3325 
3326 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3327 	__buffer_unlock_commit(buffer, event);
3328 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3329 
3330 out:
3331 	ring_buffer_nest_end(buffer);
3332 out_put:
3333 	put_trace_buf();
3334 
3335 out_nobuffer:
3336 	preempt_enable_notrace();
3337 	unpause_graph_tracing();
3338 
3339 	return len;
3340 }
3341 EXPORT_SYMBOL_GPL(trace_vbprintk);
3342 
3343 __printf(3, 0)
3344 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3345 __trace_array_vprintk(struct trace_buffer *buffer,
3346 		      unsigned long ip, const char *fmt, va_list args)
3347 {
3348 	struct ring_buffer_event *event;
3349 	int len = 0, size;
3350 	struct print_entry *entry;
3351 	unsigned int trace_ctx;
3352 	char *tbuffer;
3353 
3354 	if (tracing_disabled)
3355 		return 0;
3356 
3357 	/* Don't pollute graph traces with trace_vprintk internals */
3358 	pause_graph_tracing();
3359 
3360 	trace_ctx = tracing_gen_ctx();
3361 	preempt_disable_notrace();
3362 
3363 
3364 	tbuffer = get_trace_buf();
3365 	if (!tbuffer) {
3366 		len = 0;
3367 		goto out_nobuffer;
3368 	}
3369 
3370 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3371 
3372 	size = sizeof(*entry) + len + 1;
3373 	ring_buffer_nest_start(buffer);
3374 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3375 					    trace_ctx);
3376 	if (!event)
3377 		goto out;
3378 	entry = ring_buffer_event_data(event);
3379 	entry->ip = ip;
3380 
3381 	memcpy(&entry->buf, tbuffer, len + 1);
3382 	__buffer_unlock_commit(buffer, event);
3383 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3384 
3385 out:
3386 	ring_buffer_nest_end(buffer);
3387 	put_trace_buf();
3388 
3389 out_nobuffer:
3390 	preempt_enable_notrace();
3391 	unpause_graph_tracing();
3392 
3393 	return len;
3394 }
3395 
3396 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3397 int trace_array_vprintk(struct trace_array *tr,
3398 			unsigned long ip, const char *fmt, va_list args)
3399 {
3400 	if (tracing_selftest_running && tr == &global_trace)
3401 		return 0;
3402 
3403 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3404 }
3405 
3406 /**
3407  * trace_array_printk - Print a message to a specific instance
3408  * @tr: The instance trace_array descriptor
3409  * @ip: The instruction pointer that this is called from.
3410  * @fmt: The format to print (printf format)
3411  *
3412  * If a subsystem sets up its own instance, they have the right to
3413  * printk strings into their tracing instance buffer using this
3414  * function. Note, this function will not write into the top level
3415  * buffer (use trace_printk() for that), as writing into the top level
3416  * buffer should only have events that can be individually disabled.
3417  * trace_printk() is only used for debugging a kernel, and should not
3418  * be ever incorporated in normal use.
3419  *
3420  * trace_array_printk() can be used, as it will not add noise to the
3421  * top level tracing buffer.
3422  *
3423  * Note, trace_array_init_printk() must be called on @tr before this
3424  * can be used.
3425  */
3426 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3427 int trace_array_printk(struct trace_array *tr,
3428 		       unsigned long ip, const char *fmt, ...)
3429 {
3430 	int ret;
3431 	va_list ap;
3432 
3433 	if (!tr)
3434 		return -ENOENT;
3435 
3436 	/* This is only allowed for created instances */
3437 	if (tr == &global_trace)
3438 		return 0;
3439 
3440 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3441 		return 0;
3442 
3443 	va_start(ap, fmt);
3444 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3445 	va_end(ap);
3446 	return ret;
3447 }
3448 EXPORT_SYMBOL_GPL(trace_array_printk);
3449 
3450 /**
3451  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3452  * @tr: The trace array to initialize the buffers for
3453  *
3454  * As trace_array_printk() only writes into instances, they are OK to
3455  * have in the kernel (unlike trace_printk()). This needs to be called
3456  * before trace_array_printk() can be used on a trace_array.
3457  */
trace_array_init_printk(struct trace_array * tr)3458 int trace_array_init_printk(struct trace_array *tr)
3459 {
3460 	if (!tr)
3461 		return -ENOENT;
3462 
3463 	/* This is only allowed for created instances */
3464 	if (tr == &global_trace)
3465 		return -EINVAL;
3466 
3467 	return alloc_percpu_trace_buffer();
3468 }
3469 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3470 
3471 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3472 int trace_array_printk_buf(struct trace_buffer *buffer,
3473 			   unsigned long ip, const char *fmt, ...)
3474 {
3475 	int ret;
3476 	va_list ap;
3477 
3478 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3479 		return 0;
3480 
3481 	va_start(ap, fmt);
3482 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3483 	va_end(ap);
3484 	return ret;
3485 }
3486 
3487 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3488 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3489 {
3490 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3491 }
3492 EXPORT_SYMBOL_GPL(trace_vprintk);
3493 
trace_iterator_increment(struct trace_iterator * iter)3494 static void trace_iterator_increment(struct trace_iterator *iter)
3495 {
3496 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3497 
3498 	iter->idx++;
3499 	if (buf_iter)
3500 		ring_buffer_iter_advance(buf_iter);
3501 }
3502 
3503 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3504 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3505 		unsigned long *lost_events)
3506 {
3507 	struct ring_buffer_event *event;
3508 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3509 
3510 	if (buf_iter) {
3511 		event = ring_buffer_iter_peek(buf_iter, ts);
3512 		if (lost_events)
3513 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3514 				(unsigned long)-1 : 0;
3515 	} else {
3516 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3517 					 lost_events);
3518 	}
3519 
3520 	if (event) {
3521 		iter->ent_size = ring_buffer_event_length(event);
3522 		return ring_buffer_event_data(event);
3523 	}
3524 	iter->ent_size = 0;
3525 	return NULL;
3526 }
3527 
3528 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3529 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3530 		  unsigned long *missing_events, u64 *ent_ts)
3531 {
3532 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3533 	struct trace_entry *ent, *next = NULL;
3534 	unsigned long lost_events = 0, next_lost = 0;
3535 	int cpu_file = iter->cpu_file;
3536 	u64 next_ts = 0, ts;
3537 	int next_cpu = -1;
3538 	int next_size = 0;
3539 	int cpu;
3540 
3541 	/*
3542 	 * If we are in a per_cpu trace file, don't bother by iterating over
3543 	 * all cpu and peek directly.
3544 	 */
3545 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3546 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3547 			return NULL;
3548 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3549 		if (ent_cpu)
3550 			*ent_cpu = cpu_file;
3551 
3552 		return ent;
3553 	}
3554 
3555 	for_each_tracing_cpu(cpu) {
3556 
3557 		if (ring_buffer_empty_cpu(buffer, cpu))
3558 			continue;
3559 
3560 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3561 
3562 		/*
3563 		 * Pick the entry with the smallest timestamp:
3564 		 */
3565 		if (ent && (!next || ts < next_ts)) {
3566 			next = ent;
3567 			next_cpu = cpu;
3568 			next_ts = ts;
3569 			next_lost = lost_events;
3570 			next_size = iter->ent_size;
3571 		}
3572 	}
3573 
3574 	iter->ent_size = next_size;
3575 
3576 	if (ent_cpu)
3577 		*ent_cpu = next_cpu;
3578 
3579 	if (ent_ts)
3580 		*ent_ts = next_ts;
3581 
3582 	if (missing_events)
3583 		*missing_events = next_lost;
3584 
3585 	return next;
3586 }
3587 
3588 #define STATIC_FMT_BUF_SIZE	128
3589 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3590 
trace_iter_expand_format(struct trace_iterator * iter)3591 char *trace_iter_expand_format(struct trace_iterator *iter)
3592 {
3593 	char *tmp;
3594 
3595 	/*
3596 	 * iter->tr is NULL when used with tp_printk, which makes
3597 	 * this get called where it is not safe to call krealloc().
3598 	 */
3599 	if (!iter->tr || iter->fmt == static_fmt_buf)
3600 		return NULL;
3601 
3602 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3603 		       GFP_KERNEL);
3604 	if (tmp) {
3605 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3606 		iter->fmt = tmp;
3607 	}
3608 
3609 	return tmp;
3610 }
3611 
3612 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3613 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3614 {
3615 	unsigned long addr = (unsigned long)str;
3616 	struct trace_event *trace_event;
3617 	struct trace_event_call *event;
3618 
3619 	/* OK if part of the event data */
3620 	if ((addr >= (unsigned long)iter->ent) &&
3621 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3622 		return true;
3623 
3624 	/* OK if part of the temp seq buffer */
3625 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3626 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3627 		return true;
3628 
3629 	/* Core rodata can not be freed */
3630 	if (is_kernel_rodata(addr))
3631 		return true;
3632 
3633 	if (trace_is_tracepoint_string(str))
3634 		return true;
3635 
3636 	/*
3637 	 * Now this could be a module event, referencing core module
3638 	 * data, which is OK.
3639 	 */
3640 	if (!iter->ent)
3641 		return false;
3642 
3643 	trace_event = ftrace_find_event(iter->ent->type);
3644 	if (!trace_event)
3645 		return false;
3646 
3647 	event = container_of(trace_event, struct trace_event_call, event);
3648 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3649 		return false;
3650 
3651 	/* Would rather have rodata, but this will suffice */
3652 	if (within_module_core(addr, event->module))
3653 		return true;
3654 
3655 	return false;
3656 }
3657 
3658 /**
3659  * ignore_event - Check dereferenced fields while writing to the seq buffer
3660  * @iter: The iterator that holds the seq buffer and the event being printed
3661  *
3662  * At boot up, test_event_printk() will flag any event that dereferences
3663  * a string with "%s" that does exist in the ring buffer. It may still
3664  * be valid, as the string may point to a static string in the kernel
3665  * rodata that never gets freed. But if the string pointer is pointing
3666  * to something that was allocated, there's a chance that it can be freed
3667  * by the time the user reads the trace. This would cause a bad memory
3668  * access by the kernel and possibly crash the system.
3669  *
3670  * This function will check if the event has any fields flagged as needing
3671  * to be checked at runtime and perform those checks.
3672  *
3673  * If it is found that a field is unsafe, it will write into the @iter->seq
3674  * a message stating what was found to be unsafe.
3675  *
3676  * @return: true if the event is unsafe and should be ignored,
3677  *          false otherwise.
3678  */
ignore_event(struct trace_iterator * iter)3679 bool ignore_event(struct trace_iterator *iter)
3680 {
3681 	struct ftrace_event_field *field;
3682 	struct trace_event *trace_event;
3683 	struct trace_event_call *event;
3684 	struct list_head *head;
3685 	struct trace_seq *seq;
3686 	const void *ptr;
3687 
3688 	trace_event = ftrace_find_event(iter->ent->type);
3689 
3690 	seq = &iter->seq;
3691 
3692 	if (!trace_event) {
3693 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3694 		return true;
3695 	}
3696 
3697 	event = container_of(trace_event, struct trace_event_call, event);
3698 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3699 		return false;
3700 
3701 	head = trace_get_fields(event);
3702 	if (!head) {
3703 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3704 				 trace_event_name(event));
3705 		return true;
3706 	}
3707 
3708 	/* Offsets are from the iter->ent that points to the raw event */
3709 	ptr = iter->ent;
3710 
3711 	list_for_each_entry(field, head, link) {
3712 		const char *str;
3713 		bool good;
3714 
3715 		if (!field->needs_test)
3716 			continue;
3717 
3718 		str = *(const char **)(ptr + field->offset);
3719 
3720 		good = trace_safe_str(iter, str);
3721 
3722 		/*
3723 		 * If you hit this warning, it is likely that the
3724 		 * trace event in question used %s on a string that
3725 		 * was saved at the time of the event, but may not be
3726 		 * around when the trace is read. Use __string(),
3727 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3728 		 * instead. See samples/trace_events/trace-events-sample.h
3729 		 * for reference.
3730 		 */
3731 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3732 			      trace_event_name(event), field->name)) {
3733 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3734 					 trace_event_name(event), field->name);
3735 			return true;
3736 		}
3737 	}
3738 	return false;
3739 }
3740 
trace_event_format(struct trace_iterator * iter,const char * fmt)3741 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3742 {
3743 	const char *p, *new_fmt;
3744 	char *q;
3745 
3746 	if (WARN_ON_ONCE(!fmt))
3747 		return fmt;
3748 
3749 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3750 		return fmt;
3751 
3752 	p = fmt;
3753 	new_fmt = q = iter->fmt;
3754 	while (*p) {
3755 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3756 			if (!trace_iter_expand_format(iter))
3757 				return fmt;
3758 
3759 			q += iter->fmt - new_fmt;
3760 			new_fmt = iter->fmt;
3761 		}
3762 
3763 		*q++ = *p++;
3764 
3765 		/* Replace %p with %px */
3766 		if (p[-1] == '%') {
3767 			if (p[0] == '%') {
3768 				*q++ = *p++;
3769 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3770 				*q++ = *p++;
3771 				*q++ = 'x';
3772 			}
3773 		}
3774 	}
3775 	*q = '\0';
3776 
3777 	return new_fmt;
3778 }
3779 
3780 #define STATIC_TEMP_BUF_SIZE	128
3781 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3782 
3783 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3784 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3785 					  int *ent_cpu, u64 *ent_ts)
3786 {
3787 	/* __find_next_entry will reset ent_size */
3788 	int ent_size = iter->ent_size;
3789 	struct trace_entry *entry;
3790 
3791 	/*
3792 	 * If called from ftrace_dump(), then the iter->temp buffer
3793 	 * will be the static_temp_buf and not created from kmalloc.
3794 	 * If the entry size is greater than the buffer, we can
3795 	 * not save it. Just return NULL in that case. This is only
3796 	 * used to add markers when two consecutive events' time
3797 	 * stamps have a large delta. See trace_print_lat_context()
3798 	 */
3799 	if (iter->temp == static_temp_buf &&
3800 	    STATIC_TEMP_BUF_SIZE < ent_size)
3801 		return NULL;
3802 
3803 	/*
3804 	 * The __find_next_entry() may call peek_next_entry(), which may
3805 	 * call ring_buffer_peek() that may make the contents of iter->ent
3806 	 * undefined. Need to copy iter->ent now.
3807 	 */
3808 	if (iter->ent && iter->ent != iter->temp) {
3809 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3810 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3811 			void *temp;
3812 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3813 			if (!temp)
3814 				return NULL;
3815 			kfree(iter->temp);
3816 			iter->temp = temp;
3817 			iter->temp_size = iter->ent_size;
3818 		}
3819 		memcpy(iter->temp, iter->ent, iter->ent_size);
3820 		iter->ent = iter->temp;
3821 	}
3822 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3823 	/* Put back the original ent_size */
3824 	iter->ent_size = ent_size;
3825 
3826 	return entry;
3827 }
3828 
3829 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3830 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3831 {
3832 	iter->ent = __find_next_entry(iter, &iter->cpu,
3833 				      &iter->lost_events, &iter->ts);
3834 
3835 	if (iter->ent)
3836 		trace_iterator_increment(iter);
3837 
3838 	return iter->ent ? iter : NULL;
3839 }
3840 
trace_consume(struct trace_iterator * iter)3841 static void trace_consume(struct trace_iterator *iter)
3842 {
3843 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3844 			    &iter->lost_events);
3845 }
3846 
s_next(struct seq_file * m,void * v,loff_t * pos)3847 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3848 {
3849 	struct trace_iterator *iter = m->private;
3850 	int i = (int)*pos;
3851 	void *ent;
3852 
3853 	WARN_ON_ONCE(iter->leftover);
3854 
3855 	(*pos)++;
3856 
3857 	/* can't go backwards */
3858 	if (iter->idx > i)
3859 		return NULL;
3860 
3861 	if (iter->idx < 0)
3862 		ent = trace_find_next_entry_inc(iter);
3863 	else
3864 		ent = iter;
3865 
3866 	while (ent && iter->idx < i)
3867 		ent = trace_find_next_entry_inc(iter);
3868 
3869 	iter->pos = *pos;
3870 
3871 	return ent;
3872 }
3873 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3874 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3875 {
3876 	struct ring_buffer_iter *buf_iter;
3877 	unsigned long entries = 0;
3878 	u64 ts;
3879 
3880 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3881 
3882 	buf_iter = trace_buffer_iter(iter, cpu);
3883 	if (!buf_iter)
3884 		return;
3885 
3886 	ring_buffer_iter_reset(buf_iter);
3887 
3888 	/*
3889 	 * We could have the case with the max latency tracers
3890 	 * that a reset never took place on a cpu. This is evident
3891 	 * by the timestamp being before the start of the buffer.
3892 	 */
3893 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3894 		if (ts >= iter->array_buffer->time_start)
3895 			break;
3896 		entries++;
3897 		ring_buffer_iter_advance(buf_iter);
3898 		/* This could be a big loop */
3899 		cond_resched();
3900 	}
3901 
3902 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3903 }
3904 
3905 /*
3906  * The current tracer is copied to avoid a global locking
3907  * all around.
3908  */
s_start(struct seq_file * m,loff_t * pos)3909 static void *s_start(struct seq_file *m, loff_t *pos)
3910 {
3911 	struct trace_iterator *iter = m->private;
3912 	struct trace_array *tr = iter->tr;
3913 	int cpu_file = iter->cpu_file;
3914 	void *p = NULL;
3915 	loff_t l = 0;
3916 	int cpu;
3917 
3918 	mutex_lock(&trace_types_lock);
3919 	if (unlikely(tr->current_trace != iter->trace)) {
3920 		/* Close iter->trace before switching to the new current tracer */
3921 		if (iter->trace->close)
3922 			iter->trace->close(iter);
3923 		iter->trace = tr->current_trace;
3924 		/* Reopen the new current tracer */
3925 		if (iter->trace->open)
3926 			iter->trace->open(iter);
3927 	}
3928 	mutex_unlock(&trace_types_lock);
3929 
3930 #ifdef CONFIG_TRACER_MAX_TRACE
3931 	if (iter->snapshot && iter->trace->use_max_tr)
3932 		return ERR_PTR(-EBUSY);
3933 #endif
3934 
3935 	if (*pos != iter->pos) {
3936 		iter->ent = NULL;
3937 		iter->cpu = 0;
3938 		iter->idx = -1;
3939 
3940 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3941 			for_each_tracing_cpu(cpu)
3942 				tracing_iter_reset(iter, cpu);
3943 		} else
3944 			tracing_iter_reset(iter, cpu_file);
3945 
3946 		iter->leftover = 0;
3947 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3948 			;
3949 
3950 	} else {
3951 		/*
3952 		 * If we overflowed the seq_file before, then we want
3953 		 * to just reuse the trace_seq buffer again.
3954 		 */
3955 		if (iter->leftover)
3956 			p = iter;
3957 		else {
3958 			l = *pos - 1;
3959 			p = s_next(m, p, &l);
3960 		}
3961 	}
3962 
3963 	trace_event_read_lock();
3964 	trace_access_lock(cpu_file);
3965 	return p;
3966 }
3967 
s_stop(struct seq_file * m,void * p)3968 static void s_stop(struct seq_file *m, void *p)
3969 {
3970 	struct trace_iterator *iter = m->private;
3971 
3972 #ifdef CONFIG_TRACER_MAX_TRACE
3973 	if (iter->snapshot && iter->trace->use_max_tr)
3974 		return;
3975 #endif
3976 
3977 	trace_access_unlock(iter->cpu_file);
3978 	trace_event_read_unlock();
3979 }
3980 
3981 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3982 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3983 		      unsigned long *entries, int cpu)
3984 {
3985 	unsigned long count;
3986 
3987 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3988 	/*
3989 	 * If this buffer has skipped entries, then we hold all
3990 	 * entries for the trace and we need to ignore the
3991 	 * ones before the time stamp.
3992 	 */
3993 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3994 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3995 		/* total is the same as the entries */
3996 		*total = count;
3997 	} else
3998 		*total = count +
3999 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4000 	*entries = count;
4001 }
4002 
4003 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4004 get_total_entries(struct array_buffer *buf,
4005 		  unsigned long *total, unsigned long *entries)
4006 {
4007 	unsigned long t, e;
4008 	int cpu;
4009 
4010 	*total = 0;
4011 	*entries = 0;
4012 
4013 	for_each_tracing_cpu(cpu) {
4014 		get_total_entries_cpu(buf, &t, &e, cpu);
4015 		*total += t;
4016 		*entries += e;
4017 	}
4018 }
4019 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4020 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4021 {
4022 	unsigned long total, entries;
4023 
4024 	if (!tr)
4025 		tr = &global_trace;
4026 
4027 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4028 
4029 	return entries;
4030 }
4031 
trace_total_entries(struct trace_array * tr)4032 unsigned long trace_total_entries(struct trace_array *tr)
4033 {
4034 	unsigned long total, entries;
4035 
4036 	if (!tr)
4037 		tr = &global_trace;
4038 
4039 	get_total_entries(&tr->array_buffer, &total, &entries);
4040 
4041 	return entries;
4042 }
4043 
print_lat_help_header(struct seq_file * m)4044 static void print_lat_help_header(struct seq_file *m)
4045 {
4046 	seq_puts(m, "#                    _------=> CPU#            \n"
4047 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4048 		    "#                  | / _----=> need-resched    \n"
4049 		    "#                  || / _---=> hardirq/softirq \n"
4050 		    "#                  ||| / _--=> preempt-depth   \n"
4051 		    "#                  |||| / _-=> migrate-disable \n"
4052 		    "#                  ||||| /     delay           \n"
4053 		    "#  cmd     pid     |||||| time  |   caller     \n"
4054 		    "#     \\   /        ||||||  \\    |    /       \n");
4055 }
4056 
print_event_info(struct array_buffer * buf,struct seq_file * m)4057 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4058 {
4059 	unsigned long total;
4060 	unsigned long entries;
4061 
4062 	get_total_entries(buf, &total, &entries);
4063 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4064 		   entries, total, num_online_cpus());
4065 	seq_puts(m, "#\n");
4066 }
4067 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4068 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4069 				   unsigned int flags)
4070 {
4071 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4072 
4073 	print_event_info(buf, m);
4074 
4075 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4076 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4077 }
4078 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4079 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4080 				       unsigned int flags)
4081 {
4082 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4083 	static const char space[] = "            ";
4084 	int prec = tgid ? 12 : 2;
4085 
4086 	print_event_info(buf, m);
4087 
4088 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4089 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4090 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4091 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4092 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4093 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4094 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4095 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4096 }
4097 
4098 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4099 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4100 {
4101 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4102 	struct array_buffer *buf = iter->array_buffer;
4103 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4104 	struct tracer *type = iter->trace;
4105 	unsigned long entries;
4106 	unsigned long total;
4107 	const char *name = type->name;
4108 
4109 	get_total_entries(buf, &total, &entries);
4110 
4111 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4112 		   name, init_utsname()->release);
4113 	seq_puts(m, "# -----------------------------------"
4114 		 "---------------------------------\n");
4115 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4116 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4117 		   nsecs_to_usecs(data->saved_latency),
4118 		   entries,
4119 		   total,
4120 		   buf->cpu,
4121 		   preempt_model_str(),
4122 		   /* These are reserved for later use */
4123 		   0, 0, 0, 0);
4124 #ifdef CONFIG_SMP
4125 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4126 #else
4127 	seq_puts(m, ")\n");
4128 #endif
4129 	seq_puts(m, "#    -----------------\n");
4130 	seq_printf(m, "#    | task: %.16s-%d "
4131 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4132 		   data->comm, data->pid,
4133 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4134 		   data->policy, data->rt_priority);
4135 	seq_puts(m, "#    -----------------\n");
4136 
4137 	if (data->critical_start) {
4138 		seq_puts(m, "#  => started at: ");
4139 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4140 		trace_print_seq(m, &iter->seq);
4141 		seq_puts(m, "\n#  => ended at:   ");
4142 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4143 		trace_print_seq(m, &iter->seq);
4144 		seq_puts(m, "\n#\n");
4145 	}
4146 
4147 	seq_puts(m, "#\n");
4148 }
4149 
test_cpu_buff_start(struct trace_iterator * iter)4150 static void test_cpu_buff_start(struct trace_iterator *iter)
4151 {
4152 	struct trace_seq *s = &iter->seq;
4153 	struct trace_array *tr = iter->tr;
4154 
4155 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4156 		return;
4157 
4158 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4159 		return;
4160 
4161 	if (cpumask_available(iter->started) &&
4162 	    cpumask_test_cpu(iter->cpu, iter->started))
4163 		return;
4164 
4165 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4166 		return;
4167 
4168 	if (cpumask_available(iter->started))
4169 		cpumask_set_cpu(iter->cpu, iter->started);
4170 
4171 	/* Don't print started cpu buffer for the first entry of the trace */
4172 	if (iter->idx > 1)
4173 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4174 				iter->cpu);
4175 }
4176 
print_trace_fmt(struct trace_iterator * iter)4177 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4178 {
4179 	struct trace_array *tr = iter->tr;
4180 	struct trace_seq *s = &iter->seq;
4181 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4182 	struct trace_entry *entry;
4183 	struct trace_event *event;
4184 
4185 	entry = iter->ent;
4186 
4187 	test_cpu_buff_start(iter);
4188 
4189 	event = ftrace_find_event(entry->type);
4190 
4191 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4192 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4193 			trace_print_lat_context(iter);
4194 		else
4195 			trace_print_context(iter);
4196 	}
4197 
4198 	if (trace_seq_has_overflowed(s))
4199 		return TRACE_TYPE_PARTIAL_LINE;
4200 
4201 	if (event) {
4202 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4203 			return print_event_fields(iter, event);
4204 		/*
4205 		 * For TRACE_EVENT() events, the print_fmt is not
4206 		 * safe to use if the array has delta offsets
4207 		 * Force printing via the fields.
4208 		 */
4209 		if ((tr->text_delta || tr->data_delta) &&
4210 		    event->type > __TRACE_LAST_TYPE)
4211 			return print_event_fields(iter, event);
4212 
4213 		return event->funcs->trace(iter, sym_flags, event);
4214 	}
4215 
4216 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4217 
4218 	return trace_handle_return(s);
4219 }
4220 
print_raw_fmt(struct trace_iterator * iter)4221 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4222 {
4223 	struct trace_array *tr = iter->tr;
4224 	struct trace_seq *s = &iter->seq;
4225 	struct trace_entry *entry;
4226 	struct trace_event *event;
4227 
4228 	entry = iter->ent;
4229 
4230 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4231 		trace_seq_printf(s, "%d %d %llu ",
4232 				 entry->pid, iter->cpu, iter->ts);
4233 
4234 	if (trace_seq_has_overflowed(s))
4235 		return TRACE_TYPE_PARTIAL_LINE;
4236 
4237 	event = ftrace_find_event(entry->type);
4238 	if (event)
4239 		return event->funcs->raw(iter, 0, event);
4240 
4241 	trace_seq_printf(s, "%d ?\n", entry->type);
4242 
4243 	return trace_handle_return(s);
4244 }
4245 
print_hex_fmt(struct trace_iterator * iter)4246 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4247 {
4248 	struct trace_array *tr = iter->tr;
4249 	struct trace_seq *s = &iter->seq;
4250 	unsigned char newline = '\n';
4251 	struct trace_entry *entry;
4252 	struct trace_event *event;
4253 
4254 	entry = iter->ent;
4255 
4256 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4257 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4258 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4259 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4260 		if (trace_seq_has_overflowed(s))
4261 			return TRACE_TYPE_PARTIAL_LINE;
4262 	}
4263 
4264 	event = ftrace_find_event(entry->type);
4265 	if (event) {
4266 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4267 		if (ret != TRACE_TYPE_HANDLED)
4268 			return ret;
4269 	}
4270 
4271 	SEQ_PUT_FIELD(s, newline);
4272 
4273 	return trace_handle_return(s);
4274 }
4275 
print_bin_fmt(struct trace_iterator * iter)4276 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4277 {
4278 	struct trace_array *tr = iter->tr;
4279 	struct trace_seq *s = &iter->seq;
4280 	struct trace_entry *entry;
4281 	struct trace_event *event;
4282 
4283 	entry = iter->ent;
4284 
4285 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4286 		SEQ_PUT_FIELD(s, entry->pid);
4287 		SEQ_PUT_FIELD(s, iter->cpu);
4288 		SEQ_PUT_FIELD(s, iter->ts);
4289 		if (trace_seq_has_overflowed(s))
4290 			return TRACE_TYPE_PARTIAL_LINE;
4291 	}
4292 
4293 	event = ftrace_find_event(entry->type);
4294 	return event ? event->funcs->binary(iter, 0, event) :
4295 		TRACE_TYPE_HANDLED;
4296 }
4297 
trace_empty(struct trace_iterator * iter)4298 int trace_empty(struct trace_iterator *iter)
4299 {
4300 	struct ring_buffer_iter *buf_iter;
4301 	int cpu;
4302 
4303 	/* If we are looking at one CPU buffer, only check that one */
4304 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4305 		cpu = iter->cpu_file;
4306 		buf_iter = trace_buffer_iter(iter, cpu);
4307 		if (buf_iter) {
4308 			if (!ring_buffer_iter_empty(buf_iter))
4309 				return 0;
4310 		} else {
4311 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4312 				return 0;
4313 		}
4314 		return 1;
4315 	}
4316 
4317 	for_each_tracing_cpu(cpu) {
4318 		buf_iter = trace_buffer_iter(iter, cpu);
4319 		if (buf_iter) {
4320 			if (!ring_buffer_iter_empty(buf_iter))
4321 				return 0;
4322 		} else {
4323 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4324 				return 0;
4325 		}
4326 	}
4327 
4328 	return 1;
4329 }
4330 
4331 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4332 enum print_line_t print_trace_line(struct trace_iterator *iter)
4333 {
4334 	struct trace_array *tr = iter->tr;
4335 	unsigned long trace_flags = tr->trace_flags;
4336 	enum print_line_t ret;
4337 
4338 	if (iter->lost_events) {
4339 		if (iter->lost_events == (unsigned long)-1)
4340 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4341 					 iter->cpu);
4342 		else
4343 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4344 					 iter->cpu, iter->lost_events);
4345 		if (trace_seq_has_overflowed(&iter->seq))
4346 			return TRACE_TYPE_PARTIAL_LINE;
4347 	}
4348 
4349 	if (iter->trace && iter->trace->print_line) {
4350 		ret = iter->trace->print_line(iter);
4351 		if (ret != TRACE_TYPE_UNHANDLED)
4352 			return ret;
4353 	}
4354 
4355 	if (iter->ent->type == TRACE_BPUTS &&
4356 			trace_flags & TRACE_ITER_PRINTK &&
4357 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4358 		return trace_print_bputs_msg_only(iter);
4359 
4360 	if (iter->ent->type == TRACE_BPRINT &&
4361 			trace_flags & TRACE_ITER_PRINTK &&
4362 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4363 		return trace_print_bprintk_msg_only(iter);
4364 
4365 	if (iter->ent->type == TRACE_PRINT &&
4366 			trace_flags & TRACE_ITER_PRINTK &&
4367 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4368 		return trace_print_printk_msg_only(iter);
4369 
4370 	if (trace_flags & TRACE_ITER_BIN)
4371 		return print_bin_fmt(iter);
4372 
4373 	if (trace_flags & TRACE_ITER_HEX)
4374 		return print_hex_fmt(iter);
4375 
4376 	if (trace_flags & TRACE_ITER_RAW)
4377 		return print_raw_fmt(iter);
4378 
4379 	return print_trace_fmt(iter);
4380 }
4381 
trace_latency_header(struct seq_file * m)4382 void trace_latency_header(struct seq_file *m)
4383 {
4384 	struct trace_iterator *iter = m->private;
4385 	struct trace_array *tr = iter->tr;
4386 
4387 	/* print nothing if the buffers are empty */
4388 	if (trace_empty(iter))
4389 		return;
4390 
4391 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4392 		print_trace_header(m, iter);
4393 
4394 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4395 		print_lat_help_header(m);
4396 }
4397 
trace_default_header(struct seq_file * m)4398 void trace_default_header(struct seq_file *m)
4399 {
4400 	struct trace_iterator *iter = m->private;
4401 	struct trace_array *tr = iter->tr;
4402 	unsigned long trace_flags = tr->trace_flags;
4403 
4404 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4405 		return;
4406 
4407 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4408 		/* print nothing if the buffers are empty */
4409 		if (trace_empty(iter))
4410 			return;
4411 		print_trace_header(m, iter);
4412 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4413 			print_lat_help_header(m);
4414 	} else {
4415 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4416 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4417 				print_func_help_header_irq(iter->array_buffer,
4418 							   m, trace_flags);
4419 			else
4420 				print_func_help_header(iter->array_buffer, m,
4421 						       trace_flags);
4422 		}
4423 	}
4424 }
4425 
test_ftrace_alive(struct seq_file * m)4426 static void test_ftrace_alive(struct seq_file *m)
4427 {
4428 	if (!ftrace_is_dead())
4429 		return;
4430 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4431 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4432 }
4433 
4434 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4435 static void show_snapshot_main_help(struct seq_file *m)
4436 {
4437 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4438 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4439 		    "#                      Takes a snapshot of the main buffer.\n"
4440 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4441 		    "#                      (Doesn't have to be '2' works with any number that\n"
4442 		    "#                       is not a '0' or '1')\n");
4443 }
4444 
show_snapshot_percpu_help(struct seq_file * m)4445 static void show_snapshot_percpu_help(struct seq_file *m)
4446 {
4447 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4448 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4449 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4450 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4451 #else
4452 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4453 		    "#                     Must use main snapshot file to allocate.\n");
4454 #endif
4455 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4456 		    "#                      (Doesn't have to be '2' works with any number that\n"
4457 		    "#                       is not a '0' or '1')\n");
4458 }
4459 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4460 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4461 {
4462 	if (iter->tr->allocated_snapshot)
4463 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4464 	else
4465 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4466 
4467 	seq_puts(m, "# Snapshot commands:\n");
4468 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4469 		show_snapshot_main_help(m);
4470 	else
4471 		show_snapshot_percpu_help(m);
4472 }
4473 #else
4474 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4475 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4476 #endif
4477 
s_show(struct seq_file * m,void * v)4478 static int s_show(struct seq_file *m, void *v)
4479 {
4480 	struct trace_iterator *iter = v;
4481 	int ret;
4482 
4483 	if (iter->ent == NULL) {
4484 		if (iter->tr) {
4485 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4486 			seq_puts(m, "#\n");
4487 			test_ftrace_alive(m);
4488 		}
4489 		if (iter->snapshot && trace_empty(iter))
4490 			print_snapshot_help(m, iter);
4491 		else if (iter->trace && iter->trace->print_header)
4492 			iter->trace->print_header(m);
4493 		else
4494 			trace_default_header(m);
4495 
4496 	} else if (iter->leftover) {
4497 		/*
4498 		 * If we filled the seq_file buffer earlier, we
4499 		 * want to just show it now.
4500 		 */
4501 		ret = trace_print_seq(m, &iter->seq);
4502 
4503 		/* ret should this time be zero, but you never know */
4504 		iter->leftover = ret;
4505 
4506 	} else {
4507 		ret = print_trace_line(iter);
4508 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4509 			iter->seq.full = 0;
4510 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4511 		}
4512 		ret = trace_print_seq(m, &iter->seq);
4513 		/*
4514 		 * If we overflow the seq_file buffer, then it will
4515 		 * ask us for this data again at start up.
4516 		 * Use that instead.
4517 		 *  ret is 0 if seq_file write succeeded.
4518 		 *        -1 otherwise.
4519 		 */
4520 		iter->leftover = ret;
4521 	}
4522 
4523 	return 0;
4524 }
4525 
4526 /*
4527  * Should be used after trace_array_get(), trace_types_lock
4528  * ensures that i_cdev was already initialized.
4529  */
tracing_get_cpu(struct inode * inode)4530 static inline int tracing_get_cpu(struct inode *inode)
4531 {
4532 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4533 		return (long)inode->i_cdev - 1;
4534 	return RING_BUFFER_ALL_CPUS;
4535 }
4536 
4537 static const struct seq_operations tracer_seq_ops = {
4538 	.start		= s_start,
4539 	.next		= s_next,
4540 	.stop		= s_stop,
4541 	.show		= s_show,
4542 };
4543 
4544 /*
4545  * Note, as iter itself can be allocated and freed in different
4546  * ways, this function is only used to free its content, and not
4547  * the iterator itself. The only requirement to all the allocations
4548  * is that it must zero all fields (kzalloc), as freeing works with
4549  * ethier allocated content or NULL.
4550  */
free_trace_iter_content(struct trace_iterator * iter)4551 static void free_trace_iter_content(struct trace_iterator *iter)
4552 {
4553 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4554 	if (iter->fmt != static_fmt_buf)
4555 		kfree(iter->fmt);
4556 
4557 	kfree(iter->temp);
4558 	kfree(iter->buffer_iter);
4559 	mutex_destroy(&iter->mutex);
4560 	free_cpumask_var(iter->started);
4561 }
4562 
4563 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4564 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4565 {
4566 	struct trace_array *tr = inode->i_private;
4567 	struct trace_iterator *iter;
4568 	int cpu;
4569 
4570 	if (tracing_disabled)
4571 		return ERR_PTR(-ENODEV);
4572 
4573 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4574 	if (!iter)
4575 		return ERR_PTR(-ENOMEM);
4576 
4577 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4578 				    GFP_KERNEL);
4579 	if (!iter->buffer_iter)
4580 		goto release;
4581 
4582 	/*
4583 	 * trace_find_next_entry() may need to save off iter->ent.
4584 	 * It will place it into the iter->temp buffer. As most
4585 	 * events are less than 128, allocate a buffer of that size.
4586 	 * If one is greater, then trace_find_next_entry() will
4587 	 * allocate a new buffer to adjust for the bigger iter->ent.
4588 	 * It's not critical if it fails to get allocated here.
4589 	 */
4590 	iter->temp = kmalloc(128, GFP_KERNEL);
4591 	if (iter->temp)
4592 		iter->temp_size = 128;
4593 
4594 	/*
4595 	 * trace_event_printf() may need to modify given format
4596 	 * string to replace %p with %px so that it shows real address
4597 	 * instead of hash value. However, that is only for the event
4598 	 * tracing, other tracer may not need. Defer the allocation
4599 	 * until it is needed.
4600 	 */
4601 	iter->fmt = NULL;
4602 	iter->fmt_size = 0;
4603 
4604 	mutex_lock(&trace_types_lock);
4605 	iter->trace = tr->current_trace;
4606 
4607 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4608 		goto fail;
4609 
4610 	iter->tr = tr;
4611 
4612 #ifdef CONFIG_TRACER_MAX_TRACE
4613 	/* Currently only the top directory has a snapshot */
4614 	if (tr->current_trace->print_max || snapshot)
4615 		iter->array_buffer = &tr->max_buffer;
4616 	else
4617 #endif
4618 		iter->array_buffer = &tr->array_buffer;
4619 	iter->snapshot = snapshot;
4620 	iter->pos = -1;
4621 	iter->cpu_file = tracing_get_cpu(inode);
4622 	mutex_init(&iter->mutex);
4623 
4624 	/* Notify the tracer early; before we stop tracing. */
4625 	if (iter->trace->open)
4626 		iter->trace->open(iter);
4627 
4628 	/* Annotate start of buffers if we had overruns */
4629 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4630 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4631 
4632 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4633 	if (trace_clocks[tr->clock_id].in_ns)
4634 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4635 
4636 	/*
4637 	 * If pause-on-trace is enabled, then stop the trace while
4638 	 * dumping, unless this is the "snapshot" file
4639 	 */
4640 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4641 		tracing_stop_tr(tr);
4642 
4643 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4644 		for_each_tracing_cpu(cpu) {
4645 			iter->buffer_iter[cpu] =
4646 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4647 							 cpu, GFP_KERNEL);
4648 		}
4649 		ring_buffer_read_prepare_sync();
4650 		for_each_tracing_cpu(cpu) {
4651 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4652 			tracing_iter_reset(iter, cpu);
4653 		}
4654 	} else {
4655 		cpu = iter->cpu_file;
4656 		iter->buffer_iter[cpu] =
4657 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4658 						 cpu, GFP_KERNEL);
4659 		ring_buffer_read_prepare_sync();
4660 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4661 		tracing_iter_reset(iter, cpu);
4662 	}
4663 
4664 	mutex_unlock(&trace_types_lock);
4665 
4666 	return iter;
4667 
4668  fail:
4669 	mutex_unlock(&trace_types_lock);
4670 	free_trace_iter_content(iter);
4671 release:
4672 	seq_release_private(inode, file);
4673 	return ERR_PTR(-ENOMEM);
4674 }
4675 
tracing_open_generic(struct inode * inode,struct file * filp)4676 int tracing_open_generic(struct inode *inode, struct file *filp)
4677 {
4678 	int ret;
4679 
4680 	ret = tracing_check_open_get_tr(NULL);
4681 	if (ret)
4682 		return ret;
4683 
4684 	filp->private_data = inode->i_private;
4685 	return 0;
4686 }
4687 
tracing_is_disabled(void)4688 bool tracing_is_disabled(void)
4689 {
4690 	return (tracing_disabled) ? true: false;
4691 }
4692 
4693 /*
4694  * Open and update trace_array ref count.
4695  * Must have the current trace_array passed to it.
4696  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4697 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4698 {
4699 	struct trace_array *tr = inode->i_private;
4700 	int ret;
4701 
4702 	ret = tracing_check_open_get_tr(tr);
4703 	if (ret)
4704 		return ret;
4705 
4706 	filp->private_data = inode->i_private;
4707 
4708 	return 0;
4709 }
4710 
4711 /*
4712  * The private pointer of the inode is the trace_event_file.
4713  * Update the tr ref count associated to it.
4714  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4715 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4716 {
4717 	struct trace_event_file *file = inode->i_private;
4718 	int ret;
4719 
4720 	ret = tracing_check_open_get_tr(file->tr);
4721 	if (ret)
4722 		return ret;
4723 
4724 	mutex_lock(&event_mutex);
4725 
4726 	/* Fail if the file is marked for removal */
4727 	if (file->flags & EVENT_FILE_FL_FREED) {
4728 		trace_array_put(file->tr);
4729 		ret = -ENODEV;
4730 	} else {
4731 		event_file_get(file);
4732 	}
4733 
4734 	mutex_unlock(&event_mutex);
4735 	if (ret)
4736 		return ret;
4737 
4738 	filp->private_data = inode->i_private;
4739 
4740 	return 0;
4741 }
4742 
tracing_release_file_tr(struct inode * inode,struct file * filp)4743 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4744 {
4745 	struct trace_event_file *file = inode->i_private;
4746 
4747 	trace_array_put(file->tr);
4748 	event_file_put(file);
4749 
4750 	return 0;
4751 }
4752 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4753 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4754 {
4755 	tracing_release_file_tr(inode, filp);
4756 	return single_release(inode, filp);
4757 }
4758 
tracing_mark_open(struct inode * inode,struct file * filp)4759 static int tracing_mark_open(struct inode *inode, struct file *filp)
4760 {
4761 	stream_open(inode, filp);
4762 	return tracing_open_generic_tr(inode, filp);
4763 }
4764 
tracing_release(struct inode * inode,struct file * file)4765 static int tracing_release(struct inode *inode, struct file *file)
4766 {
4767 	struct trace_array *tr = inode->i_private;
4768 	struct seq_file *m = file->private_data;
4769 	struct trace_iterator *iter;
4770 	int cpu;
4771 
4772 	if (!(file->f_mode & FMODE_READ)) {
4773 		trace_array_put(tr);
4774 		return 0;
4775 	}
4776 
4777 	/* Writes do not use seq_file */
4778 	iter = m->private;
4779 	mutex_lock(&trace_types_lock);
4780 
4781 	for_each_tracing_cpu(cpu) {
4782 		if (iter->buffer_iter[cpu])
4783 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4784 	}
4785 
4786 	if (iter->trace && iter->trace->close)
4787 		iter->trace->close(iter);
4788 
4789 	if (!iter->snapshot && tr->stop_count)
4790 		/* reenable tracing if it was previously enabled */
4791 		tracing_start_tr(tr);
4792 
4793 	__trace_array_put(tr);
4794 
4795 	mutex_unlock(&trace_types_lock);
4796 
4797 	free_trace_iter_content(iter);
4798 	seq_release_private(inode, file);
4799 
4800 	return 0;
4801 }
4802 
tracing_release_generic_tr(struct inode * inode,struct file * file)4803 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4804 {
4805 	struct trace_array *tr = inode->i_private;
4806 
4807 	trace_array_put(tr);
4808 	return 0;
4809 }
4810 
tracing_single_release_tr(struct inode * inode,struct file * file)4811 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4812 {
4813 	struct trace_array *tr = inode->i_private;
4814 
4815 	trace_array_put(tr);
4816 
4817 	return single_release(inode, file);
4818 }
4819 
tracing_open(struct inode * inode,struct file * file)4820 static int tracing_open(struct inode *inode, struct file *file)
4821 {
4822 	struct trace_array *tr = inode->i_private;
4823 	struct trace_iterator *iter;
4824 	int ret;
4825 
4826 	ret = tracing_check_open_get_tr(tr);
4827 	if (ret)
4828 		return ret;
4829 
4830 	/* If this file was open for write, then erase contents */
4831 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4832 		int cpu = tracing_get_cpu(inode);
4833 		struct array_buffer *trace_buf = &tr->array_buffer;
4834 
4835 #ifdef CONFIG_TRACER_MAX_TRACE
4836 		if (tr->current_trace->print_max)
4837 			trace_buf = &tr->max_buffer;
4838 #endif
4839 
4840 		if (cpu == RING_BUFFER_ALL_CPUS)
4841 			tracing_reset_online_cpus(trace_buf);
4842 		else
4843 			tracing_reset_cpu(trace_buf, cpu);
4844 	}
4845 
4846 	if (file->f_mode & FMODE_READ) {
4847 		iter = __tracing_open(inode, file, false);
4848 		if (IS_ERR(iter))
4849 			ret = PTR_ERR(iter);
4850 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4851 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4852 	}
4853 
4854 	if (ret < 0)
4855 		trace_array_put(tr);
4856 
4857 	return ret;
4858 }
4859 
4860 /*
4861  * Some tracers are not suitable for instance buffers.
4862  * A tracer is always available for the global array (toplevel)
4863  * or if it explicitly states that it is.
4864  */
4865 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4866 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4867 {
4868 #ifdef CONFIG_TRACER_SNAPSHOT
4869 	/* arrays with mapped buffer range do not have snapshots */
4870 	if (tr->range_addr_start && t->use_max_tr)
4871 		return false;
4872 #endif
4873 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4874 }
4875 
4876 /* Find the next tracer that this trace array may use */
4877 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4878 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4879 {
4880 	while (t && !trace_ok_for_array(t, tr))
4881 		t = t->next;
4882 
4883 	return t;
4884 }
4885 
4886 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4887 t_next(struct seq_file *m, void *v, loff_t *pos)
4888 {
4889 	struct trace_array *tr = m->private;
4890 	struct tracer *t = v;
4891 
4892 	(*pos)++;
4893 
4894 	if (t)
4895 		t = get_tracer_for_array(tr, t->next);
4896 
4897 	return t;
4898 }
4899 
t_start(struct seq_file * m,loff_t * pos)4900 static void *t_start(struct seq_file *m, loff_t *pos)
4901 {
4902 	struct trace_array *tr = m->private;
4903 	struct tracer *t;
4904 	loff_t l = 0;
4905 
4906 	mutex_lock(&trace_types_lock);
4907 
4908 	t = get_tracer_for_array(tr, trace_types);
4909 	for (; t && l < *pos; t = t_next(m, t, &l))
4910 			;
4911 
4912 	return t;
4913 }
4914 
t_stop(struct seq_file * m,void * p)4915 static void t_stop(struct seq_file *m, void *p)
4916 {
4917 	mutex_unlock(&trace_types_lock);
4918 }
4919 
t_show(struct seq_file * m,void * v)4920 static int t_show(struct seq_file *m, void *v)
4921 {
4922 	struct tracer *t = v;
4923 
4924 	if (!t)
4925 		return 0;
4926 
4927 	seq_puts(m, t->name);
4928 	if (t->next)
4929 		seq_putc(m, ' ');
4930 	else
4931 		seq_putc(m, '\n');
4932 
4933 	return 0;
4934 }
4935 
4936 static const struct seq_operations show_traces_seq_ops = {
4937 	.start		= t_start,
4938 	.next		= t_next,
4939 	.stop		= t_stop,
4940 	.show		= t_show,
4941 };
4942 
show_traces_open(struct inode * inode,struct file * file)4943 static int show_traces_open(struct inode *inode, struct file *file)
4944 {
4945 	struct trace_array *tr = inode->i_private;
4946 	struct seq_file *m;
4947 	int ret;
4948 
4949 	ret = tracing_check_open_get_tr(tr);
4950 	if (ret)
4951 		return ret;
4952 
4953 	ret = seq_open(file, &show_traces_seq_ops);
4954 	if (ret) {
4955 		trace_array_put(tr);
4956 		return ret;
4957 	}
4958 
4959 	m = file->private_data;
4960 	m->private = tr;
4961 
4962 	return 0;
4963 }
4964 
tracing_seq_release(struct inode * inode,struct file * file)4965 static int tracing_seq_release(struct inode *inode, struct file *file)
4966 {
4967 	struct trace_array *tr = inode->i_private;
4968 
4969 	trace_array_put(tr);
4970 	return seq_release(inode, file);
4971 }
4972 
4973 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4974 tracing_write_stub(struct file *filp, const char __user *ubuf,
4975 		   size_t count, loff_t *ppos)
4976 {
4977 	return count;
4978 }
4979 
tracing_lseek(struct file * file,loff_t offset,int whence)4980 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4981 {
4982 	int ret;
4983 
4984 	if (file->f_mode & FMODE_READ)
4985 		ret = seq_lseek(file, offset, whence);
4986 	else
4987 		file->f_pos = ret = 0;
4988 
4989 	return ret;
4990 }
4991 
4992 static const struct file_operations tracing_fops = {
4993 	.open		= tracing_open,
4994 	.read		= seq_read,
4995 	.read_iter	= seq_read_iter,
4996 	.splice_read	= copy_splice_read,
4997 	.write		= tracing_write_stub,
4998 	.llseek		= tracing_lseek,
4999 	.release	= tracing_release,
5000 };
5001 
5002 static const struct file_operations show_traces_fops = {
5003 	.open		= show_traces_open,
5004 	.read		= seq_read,
5005 	.llseek		= seq_lseek,
5006 	.release	= tracing_seq_release,
5007 };
5008 
5009 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5010 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5011 		     size_t count, loff_t *ppos)
5012 {
5013 	struct trace_array *tr = file_inode(filp)->i_private;
5014 	char *mask_str;
5015 	int len;
5016 
5017 	len = snprintf(NULL, 0, "%*pb\n",
5018 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5019 	mask_str = kmalloc(len, GFP_KERNEL);
5020 	if (!mask_str)
5021 		return -ENOMEM;
5022 
5023 	len = snprintf(mask_str, len, "%*pb\n",
5024 		       cpumask_pr_args(tr->tracing_cpumask));
5025 	if (len >= count) {
5026 		count = -EINVAL;
5027 		goto out_err;
5028 	}
5029 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5030 
5031 out_err:
5032 	kfree(mask_str);
5033 
5034 	return count;
5035 }
5036 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5037 int tracing_set_cpumask(struct trace_array *tr,
5038 			cpumask_var_t tracing_cpumask_new)
5039 {
5040 	int cpu;
5041 
5042 	if (!tr)
5043 		return -EINVAL;
5044 
5045 	local_irq_disable();
5046 	arch_spin_lock(&tr->max_lock);
5047 	for_each_tracing_cpu(cpu) {
5048 		/*
5049 		 * Increase/decrease the disabled counter if we are
5050 		 * about to flip a bit in the cpumask:
5051 		 */
5052 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5053 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5054 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5055 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5056 #ifdef CONFIG_TRACER_MAX_TRACE
5057 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5058 #endif
5059 		}
5060 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5061 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5062 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5063 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5064 #ifdef CONFIG_TRACER_MAX_TRACE
5065 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5066 #endif
5067 		}
5068 	}
5069 	arch_spin_unlock(&tr->max_lock);
5070 	local_irq_enable();
5071 
5072 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5073 
5074 	return 0;
5075 }
5076 
5077 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5078 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5079 		      size_t count, loff_t *ppos)
5080 {
5081 	struct trace_array *tr = file_inode(filp)->i_private;
5082 	cpumask_var_t tracing_cpumask_new;
5083 	int err;
5084 
5085 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5086 		return -EINVAL;
5087 
5088 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5089 		return -ENOMEM;
5090 
5091 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5092 	if (err)
5093 		goto err_free;
5094 
5095 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5096 	if (err)
5097 		goto err_free;
5098 
5099 	free_cpumask_var(tracing_cpumask_new);
5100 
5101 	return count;
5102 
5103 err_free:
5104 	free_cpumask_var(tracing_cpumask_new);
5105 
5106 	return err;
5107 }
5108 
5109 static const struct file_operations tracing_cpumask_fops = {
5110 	.open		= tracing_open_generic_tr,
5111 	.read		= tracing_cpumask_read,
5112 	.write		= tracing_cpumask_write,
5113 	.release	= tracing_release_generic_tr,
5114 	.llseek		= generic_file_llseek,
5115 };
5116 
tracing_trace_options_show(struct seq_file * m,void * v)5117 static int tracing_trace_options_show(struct seq_file *m, void *v)
5118 {
5119 	struct tracer_opt *trace_opts;
5120 	struct trace_array *tr = m->private;
5121 	u32 tracer_flags;
5122 	int i;
5123 
5124 	guard(mutex)(&trace_types_lock);
5125 
5126 	tracer_flags = tr->current_trace->flags->val;
5127 	trace_opts = tr->current_trace->flags->opts;
5128 
5129 	for (i = 0; trace_options[i]; i++) {
5130 		if (tr->trace_flags & (1 << i))
5131 			seq_printf(m, "%s\n", trace_options[i]);
5132 		else
5133 			seq_printf(m, "no%s\n", trace_options[i]);
5134 	}
5135 
5136 	for (i = 0; trace_opts[i].name; i++) {
5137 		if (tracer_flags & trace_opts[i].bit)
5138 			seq_printf(m, "%s\n", trace_opts[i].name);
5139 		else
5140 			seq_printf(m, "no%s\n", trace_opts[i].name);
5141 	}
5142 
5143 	return 0;
5144 }
5145 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5146 static int __set_tracer_option(struct trace_array *tr,
5147 			       struct tracer_flags *tracer_flags,
5148 			       struct tracer_opt *opts, int neg)
5149 {
5150 	struct tracer *trace = tracer_flags->trace;
5151 	int ret;
5152 
5153 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5154 	if (ret)
5155 		return ret;
5156 
5157 	if (neg)
5158 		tracer_flags->val &= ~opts->bit;
5159 	else
5160 		tracer_flags->val |= opts->bit;
5161 	return 0;
5162 }
5163 
5164 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5165 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5166 {
5167 	struct tracer *trace = tr->current_trace;
5168 	struct tracer_flags *tracer_flags = trace->flags;
5169 	struct tracer_opt *opts = NULL;
5170 	int i;
5171 
5172 	for (i = 0; tracer_flags->opts[i].name; i++) {
5173 		opts = &tracer_flags->opts[i];
5174 
5175 		if (strcmp(cmp, opts->name) == 0)
5176 			return __set_tracer_option(tr, trace->flags, opts, neg);
5177 	}
5178 
5179 	return -EINVAL;
5180 }
5181 
5182 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5183 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5184 {
5185 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5186 		return -1;
5187 
5188 	return 0;
5189 }
5190 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5191 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5192 {
5193 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5194 	    (mask == TRACE_ITER_RECORD_CMD) ||
5195 	    (mask == TRACE_ITER_TRACE_PRINTK))
5196 		lockdep_assert_held(&event_mutex);
5197 
5198 	/* do nothing if flag is already set */
5199 	if (!!(tr->trace_flags & mask) == !!enabled)
5200 		return 0;
5201 
5202 	/* Give the tracer a chance to approve the change */
5203 	if (tr->current_trace->flag_changed)
5204 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5205 			return -EINVAL;
5206 
5207 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5208 		if (enabled) {
5209 			update_printk_trace(tr);
5210 		} else {
5211 			/*
5212 			 * The global_trace cannot clear this.
5213 			 * It's flag only gets cleared if another instance sets it.
5214 			 */
5215 			if (printk_trace == &global_trace)
5216 				return -EINVAL;
5217 			/*
5218 			 * An instance must always have it set.
5219 			 * by default, that's the global_trace instane.
5220 			 */
5221 			if (printk_trace == tr)
5222 				update_printk_trace(&global_trace);
5223 		}
5224 	}
5225 
5226 	if (enabled)
5227 		tr->trace_flags |= mask;
5228 	else
5229 		tr->trace_flags &= ~mask;
5230 
5231 	if (mask == TRACE_ITER_RECORD_CMD)
5232 		trace_event_enable_cmd_record(enabled);
5233 
5234 	if (mask == TRACE_ITER_RECORD_TGID) {
5235 
5236 		if (trace_alloc_tgid_map() < 0) {
5237 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5238 			return -ENOMEM;
5239 		}
5240 
5241 		trace_event_enable_tgid_record(enabled);
5242 	}
5243 
5244 	if (mask == TRACE_ITER_EVENT_FORK)
5245 		trace_event_follow_fork(tr, enabled);
5246 
5247 	if (mask == TRACE_ITER_FUNC_FORK)
5248 		ftrace_pid_follow_fork(tr, enabled);
5249 
5250 	if (mask == TRACE_ITER_OVERWRITE) {
5251 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5252 #ifdef CONFIG_TRACER_MAX_TRACE
5253 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5254 #endif
5255 	}
5256 
5257 	if (mask == TRACE_ITER_PRINTK) {
5258 		trace_printk_start_stop_comm(enabled);
5259 		trace_printk_control(enabled);
5260 	}
5261 
5262 	return 0;
5263 }
5264 
trace_set_options(struct trace_array * tr,char * option)5265 int trace_set_options(struct trace_array *tr, char *option)
5266 {
5267 	char *cmp;
5268 	int neg = 0;
5269 	int ret;
5270 	size_t orig_len = strlen(option);
5271 	int len;
5272 
5273 	cmp = strstrip(option);
5274 
5275 	len = str_has_prefix(cmp, "no");
5276 	if (len)
5277 		neg = 1;
5278 
5279 	cmp += len;
5280 
5281 	mutex_lock(&event_mutex);
5282 	mutex_lock(&trace_types_lock);
5283 
5284 	ret = match_string(trace_options, -1, cmp);
5285 	/* If no option could be set, test the specific tracer options */
5286 	if (ret < 0)
5287 		ret = set_tracer_option(tr, cmp, neg);
5288 	else
5289 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5290 
5291 	mutex_unlock(&trace_types_lock);
5292 	mutex_unlock(&event_mutex);
5293 
5294 	/*
5295 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5296 	 * turn it back into a space.
5297 	 */
5298 	if (orig_len > strlen(option))
5299 		option[strlen(option)] = ' ';
5300 
5301 	return ret;
5302 }
5303 
apply_trace_boot_options(void)5304 static void __init apply_trace_boot_options(void)
5305 {
5306 	char *buf = trace_boot_options_buf;
5307 	char *option;
5308 
5309 	while (true) {
5310 		option = strsep(&buf, ",");
5311 
5312 		if (!option)
5313 			break;
5314 
5315 		if (*option)
5316 			trace_set_options(&global_trace, option);
5317 
5318 		/* Put back the comma to allow this to be called again */
5319 		if (buf)
5320 			*(buf - 1) = ',';
5321 	}
5322 }
5323 
5324 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5325 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5326 			size_t cnt, loff_t *ppos)
5327 {
5328 	struct seq_file *m = filp->private_data;
5329 	struct trace_array *tr = m->private;
5330 	char buf[64];
5331 	int ret;
5332 
5333 	if (cnt >= sizeof(buf))
5334 		return -EINVAL;
5335 
5336 	if (copy_from_user(buf, ubuf, cnt))
5337 		return -EFAULT;
5338 
5339 	buf[cnt] = 0;
5340 
5341 	ret = trace_set_options(tr, buf);
5342 	if (ret < 0)
5343 		return ret;
5344 
5345 	*ppos += cnt;
5346 
5347 	return cnt;
5348 }
5349 
tracing_trace_options_open(struct inode * inode,struct file * file)5350 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5351 {
5352 	struct trace_array *tr = inode->i_private;
5353 	int ret;
5354 
5355 	ret = tracing_check_open_get_tr(tr);
5356 	if (ret)
5357 		return ret;
5358 
5359 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5360 	if (ret < 0)
5361 		trace_array_put(tr);
5362 
5363 	return ret;
5364 }
5365 
5366 static const struct file_operations tracing_iter_fops = {
5367 	.open		= tracing_trace_options_open,
5368 	.read		= seq_read,
5369 	.llseek		= seq_lseek,
5370 	.release	= tracing_single_release_tr,
5371 	.write		= tracing_trace_options_write,
5372 };
5373 
5374 static const char readme_msg[] =
5375 	"tracing mini-HOWTO:\n\n"
5376 	"By default tracefs removes all OTH file permission bits.\n"
5377 	"When mounting tracefs an optional group id can be specified\n"
5378 	"which adds the group to every directory and file in tracefs:\n\n"
5379 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5380 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5381 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5382 	" Important files:\n"
5383 	"  trace\t\t\t- The static contents of the buffer\n"
5384 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5385 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5386 	"  current_tracer\t- function and latency tracers\n"
5387 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5388 	"  error_log\t- error log for failed commands (that support it)\n"
5389 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5390 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5391 	"  trace_clock\t\t- change the clock used to order events\n"
5392 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5393 	"      global:   Synced across CPUs but slows tracing down.\n"
5394 	"     counter:   Not a clock, but just an increment\n"
5395 	"      uptime:   Jiffy counter from time of boot\n"
5396 	"        perf:   Same clock that perf events use\n"
5397 #ifdef CONFIG_X86_64
5398 	"     x86-tsc:   TSC cycle counter\n"
5399 #endif
5400 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5401 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5402 	"    absolute:   Absolute (standalone) timestamp\n"
5403 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5404 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5405 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5406 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5407 	"\t\t\t  Remove sub-buffer with rmdir\n"
5408 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5409 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5410 	"\t\t\t  option name\n"
5411 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5412 #ifdef CONFIG_DYNAMIC_FTRACE
5413 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5414 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5415 	"\t\t\t  functions\n"
5416 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5417 	"\t     modules: Can select a group via module\n"
5418 	"\t      Format: :mod:<module-name>\n"
5419 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5420 	"\t    triggers: a command to perform when function is hit\n"
5421 	"\t      Format: <function>:<trigger>[:count]\n"
5422 	"\t     trigger: traceon, traceoff\n"
5423 	"\t\t      enable_event:<system>:<event>\n"
5424 	"\t\t      disable_event:<system>:<event>\n"
5425 #ifdef CONFIG_STACKTRACE
5426 	"\t\t      stacktrace\n"
5427 #endif
5428 #ifdef CONFIG_TRACER_SNAPSHOT
5429 	"\t\t      snapshot\n"
5430 #endif
5431 	"\t\t      dump\n"
5432 	"\t\t      cpudump\n"
5433 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5434 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5435 	"\t     The first one will disable tracing every time do_fault is hit\n"
5436 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5437 	"\t       The first time do trap is hit and it disables tracing, the\n"
5438 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5439 	"\t       the counter will not decrement. It only decrements when the\n"
5440 	"\t       trigger did work\n"
5441 	"\t     To remove trigger without count:\n"
5442 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5443 	"\t     To remove trigger with a count:\n"
5444 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5445 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5446 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5447 	"\t    modules: Can select a group via module command :mod:\n"
5448 	"\t    Does not accept triggers\n"
5449 #endif /* CONFIG_DYNAMIC_FTRACE */
5450 #ifdef CONFIG_FUNCTION_TRACER
5451 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5452 	"\t\t    (function)\n"
5453 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5454 	"\t\t    (function)\n"
5455 #endif
5456 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5457 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5458 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5459 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5460 #endif
5461 #ifdef CONFIG_TRACER_SNAPSHOT
5462 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5463 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5464 	"\t\t\t  information\n"
5465 #endif
5466 #ifdef CONFIG_STACK_TRACER
5467 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5468 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5469 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5470 	"\t\t\t  new trace)\n"
5471 #ifdef CONFIG_DYNAMIC_FTRACE
5472 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5473 	"\t\t\t  traces\n"
5474 #endif
5475 #endif /* CONFIG_STACK_TRACER */
5476 #ifdef CONFIG_DYNAMIC_EVENTS
5477 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5478 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5479 #endif
5480 #ifdef CONFIG_KPROBE_EVENTS
5481 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5482 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5483 #endif
5484 #ifdef CONFIG_UPROBE_EVENTS
5485 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5486 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5487 #endif
5488 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5489     defined(CONFIG_FPROBE_EVENTS)
5490 	"\t  accepts: event-definitions (one definition per line)\n"
5491 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5492 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5493 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5494 #endif
5495 #ifdef CONFIG_FPROBE_EVENTS
5496 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5497 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5498 #endif
5499 #ifdef CONFIG_HIST_TRIGGERS
5500 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5501 #endif
5502 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5503 	"\t           -:[<group>/][<event>]\n"
5504 #ifdef CONFIG_KPROBE_EVENTS
5505 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5506   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5507 #endif
5508 #ifdef CONFIG_UPROBE_EVENTS
5509   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5510 #endif
5511 	"\t     args: <name>=fetcharg[:type]\n"
5512 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5513 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5514 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5515 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5516 	"\t           <argname>[->field[->field|.field...]],\n"
5517 #endif
5518 #else
5519 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5520 #endif
5521 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5522 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5523 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5524 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5525 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5526 #ifdef CONFIG_HIST_TRIGGERS
5527 	"\t    field: <stype> <name>;\n"
5528 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5529 	"\t           [unsigned] char/int/long\n"
5530 #endif
5531 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5532 	"\t            of the <attached-group>/<attached-event>.\n"
5533 #endif
5534 	"  set_event\t\t- Enables events by name written into it\n"
5535 	"\t\t\t  Can enable module events via: :mod:<module>\n"
5536 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5537 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5538 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5539 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5540 	"\t\t\t  events\n"
5541 	"      filter\t\t- If set, only events passing filter are traced\n"
5542 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5543 	"\t\t\t  <event>:\n"
5544 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5545 	"      filter\t\t- If set, only events passing filter are traced\n"
5546 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5547 	"\t    Format: <trigger>[:count][if <filter>]\n"
5548 	"\t   trigger: traceon, traceoff\n"
5549 	"\t            enable_event:<system>:<event>\n"
5550 	"\t            disable_event:<system>:<event>\n"
5551 #ifdef CONFIG_HIST_TRIGGERS
5552 	"\t            enable_hist:<system>:<event>\n"
5553 	"\t            disable_hist:<system>:<event>\n"
5554 #endif
5555 #ifdef CONFIG_STACKTRACE
5556 	"\t\t    stacktrace\n"
5557 #endif
5558 #ifdef CONFIG_TRACER_SNAPSHOT
5559 	"\t\t    snapshot\n"
5560 #endif
5561 #ifdef CONFIG_HIST_TRIGGERS
5562 	"\t\t    hist (see below)\n"
5563 #endif
5564 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5565 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5566 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5567 	"\t                  events/block/block_unplug/trigger\n"
5568 	"\t   The first disables tracing every time block_unplug is hit.\n"
5569 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5570 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5571 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5572 	"\t   Like function triggers, the counter is only decremented if it\n"
5573 	"\t    enabled or disabled tracing.\n"
5574 	"\t   To remove a trigger without a count:\n"
5575 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5576 	"\t   To remove a trigger with a count:\n"
5577 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5578 	"\t   Filters can be ignored when removing a trigger.\n"
5579 #ifdef CONFIG_HIST_TRIGGERS
5580 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5581 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5582 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5583 	"\t            [:values=<field1[,field2,...]>]\n"
5584 	"\t            [:sort=<field1[,field2,...]>]\n"
5585 	"\t            [:size=#entries]\n"
5586 	"\t            [:pause][:continue][:clear]\n"
5587 	"\t            [:name=histname1]\n"
5588 	"\t            [:nohitcount]\n"
5589 	"\t            [:<handler>.<action>]\n"
5590 	"\t            [if <filter>]\n\n"
5591 	"\t    Note, special fields can be used as well:\n"
5592 	"\t            common_timestamp - to record current timestamp\n"
5593 	"\t            common_cpu - to record the CPU the event happened on\n"
5594 	"\n"
5595 	"\t    A hist trigger variable can be:\n"
5596 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5597 	"\t        - a reference to another variable e.g. y=$x,\n"
5598 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5599 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5600 	"\n"
5601 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5602 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5603 	"\t    variable reference, field or numeric literal.\n"
5604 	"\n"
5605 	"\t    When a matching event is hit, an entry is added to a hash\n"
5606 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5607 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5608 	"\t    correspond to fields in the event's format description.  Keys\n"
5609 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5610 	"\t    Compound keys consisting of up to two fields can be specified\n"
5611 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5612 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5613 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5614 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5615 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5616 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5617 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5618 	"\t    its histogram data will be shared with other triggers of the\n"
5619 	"\t    same name, and trigger hits will update this common data.\n\n"
5620 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5621 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5622 	"\t    triggers attached to an event, there will be a table for each\n"
5623 	"\t    trigger in the output.  The table displayed for a named\n"
5624 	"\t    trigger will be the same as any other instance having the\n"
5625 	"\t    same name.  The default format used to display a given field\n"
5626 	"\t    can be modified by appending any of the following modifiers\n"
5627 	"\t    to the field name, as applicable:\n\n"
5628 	"\t            .hex        display a number as a hex value\n"
5629 	"\t            .sym        display an address as a symbol\n"
5630 	"\t            .sym-offset display an address as a symbol and offset\n"
5631 	"\t            .execname   display a common_pid as a program name\n"
5632 	"\t            .syscall    display a syscall id as a syscall name\n"
5633 	"\t            .log2       display log2 value rather than raw number\n"
5634 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5635 	"\t            .usecs      display a common_timestamp in microseconds\n"
5636 	"\t            .percent    display a number of percentage value\n"
5637 	"\t            .graph      display a bar-graph of a value\n\n"
5638 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5639 	"\t    trigger or to start a hist trigger but not log any events\n"
5640 	"\t    until told to do so.  'continue' can be used to start or\n"
5641 	"\t    restart a paused hist trigger.\n\n"
5642 	"\t    The 'clear' parameter will clear the contents of a running\n"
5643 	"\t    hist trigger and leave its current paused/active state\n"
5644 	"\t    unchanged.\n\n"
5645 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5646 	"\t    raw hitcount in the histogram.\n\n"
5647 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5648 	"\t    have one event conditionally start and stop another event's\n"
5649 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5650 	"\t    the enable_event and disable_event triggers.\n\n"
5651 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5652 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5653 	"\t        <handler>.<action>\n\n"
5654 	"\t    The available handlers are:\n\n"
5655 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5656 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5657 	"\t        onchange(var)            - invoke action if var changes\n\n"
5658 	"\t    The available actions are:\n\n"
5659 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5660 	"\t        save(field,...)                      - save current event fields\n"
5661 #ifdef CONFIG_TRACER_SNAPSHOT
5662 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5663 #endif
5664 #ifdef CONFIG_SYNTH_EVENTS
5665 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5666 	"\t  Write into this file to define/undefine new synthetic events.\n"
5667 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5668 #endif
5669 #endif
5670 ;
5671 
5672 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5673 tracing_readme_read(struct file *filp, char __user *ubuf,
5674 		       size_t cnt, loff_t *ppos)
5675 {
5676 	return simple_read_from_buffer(ubuf, cnt, ppos,
5677 					readme_msg, strlen(readme_msg));
5678 }
5679 
5680 static const struct file_operations tracing_readme_fops = {
5681 	.open		= tracing_open_generic,
5682 	.read		= tracing_readme_read,
5683 	.llseek		= generic_file_llseek,
5684 };
5685 
5686 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5687 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5688 update_eval_map(union trace_eval_map_item *ptr)
5689 {
5690 	if (!ptr->map.eval_string) {
5691 		if (ptr->tail.next) {
5692 			ptr = ptr->tail.next;
5693 			/* Set ptr to the next real item (skip head) */
5694 			ptr++;
5695 		} else
5696 			return NULL;
5697 	}
5698 	return ptr;
5699 }
5700 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5701 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5702 {
5703 	union trace_eval_map_item *ptr = v;
5704 
5705 	/*
5706 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5707 	 * This really should never happen.
5708 	 */
5709 	(*pos)++;
5710 	ptr = update_eval_map(ptr);
5711 	if (WARN_ON_ONCE(!ptr))
5712 		return NULL;
5713 
5714 	ptr++;
5715 	ptr = update_eval_map(ptr);
5716 
5717 	return ptr;
5718 }
5719 
eval_map_start(struct seq_file * m,loff_t * pos)5720 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5721 {
5722 	union trace_eval_map_item *v;
5723 	loff_t l = 0;
5724 
5725 	mutex_lock(&trace_eval_mutex);
5726 
5727 	v = trace_eval_maps;
5728 	if (v)
5729 		v++;
5730 
5731 	while (v && l < *pos) {
5732 		v = eval_map_next(m, v, &l);
5733 	}
5734 
5735 	return v;
5736 }
5737 
eval_map_stop(struct seq_file * m,void * v)5738 static void eval_map_stop(struct seq_file *m, void *v)
5739 {
5740 	mutex_unlock(&trace_eval_mutex);
5741 }
5742 
eval_map_show(struct seq_file * m,void * v)5743 static int eval_map_show(struct seq_file *m, void *v)
5744 {
5745 	union trace_eval_map_item *ptr = v;
5746 
5747 	seq_printf(m, "%s %ld (%s)\n",
5748 		   ptr->map.eval_string, ptr->map.eval_value,
5749 		   ptr->map.system);
5750 
5751 	return 0;
5752 }
5753 
5754 static const struct seq_operations tracing_eval_map_seq_ops = {
5755 	.start		= eval_map_start,
5756 	.next		= eval_map_next,
5757 	.stop		= eval_map_stop,
5758 	.show		= eval_map_show,
5759 };
5760 
tracing_eval_map_open(struct inode * inode,struct file * filp)5761 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5762 {
5763 	int ret;
5764 
5765 	ret = tracing_check_open_get_tr(NULL);
5766 	if (ret)
5767 		return ret;
5768 
5769 	return seq_open(filp, &tracing_eval_map_seq_ops);
5770 }
5771 
5772 static const struct file_operations tracing_eval_map_fops = {
5773 	.open		= tracing_eval_map_open,
5774 	.read		= seq_read,
5775 	.llseek		= seq_lseek,
5776 	.release	= seq_release,
5777 };
5778 
5779 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5780 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5781 {
5782 	/* Return tail of array given the head */
5783 	return ptr + ptr->head.length + 1;
5784 }
5785 
5786 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5787 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5788 			   int len)
5789 {
5790 	struct trace_eval_map **stop;
5791 	struct trace_eval_map **map;
5792 	union trace_eval_map_item *map_array;
5793 	union trace_eval_map_item *ptr;
5794 
5795 	stop = start + len;
5796 
5797 	/*
5798 	 * The trace_eval_maps contains the map plus a head and tail item,
5799 	 * where the head holds the module and length of array, and the
5800 	 * tail holds a pointer to the next list.
5801 	 */
5802 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5803 	if (!map_array) {
5804 		pr_warn("Unable to allocate trace eval mapping\n");
5805 		return;
5806 	}
5807 
5808 	guard(mutex)(&trace_eval_mutex);
5809 
5810 	if (!trace_eval_maps)
5811 		trace_eval_maps = map_array;
5812 	else {
5813 		ptr = trace_eval_maps;
5814 		for (;;) {
5815 			ptr = trace_eval_jmp_to_tail(ptr);
5816 			if (!ptr->tail.next)
5817 				break;
5818 			ptr = ptr->tail.next;
5819 
5820 		}
5821 		ptr->tail.next = map_array;
5822 	}
5823 	map_array->head.mod = mod;
5824 	map_array->head.length = len;
5825 	map_array++;
5826 
5827 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5828 		map_array->map = **map;
5829 		map_array++;
5830 	}
5831 	memset(map_array, 0, sizeof(*map_array));
5832 }
5833 
trace_create_eval_file(struct dentry * d_tracer)5834 static void trace_create_eval_file(struct dentry *d_tracer)
5835 {
5836 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5837 			  NULL, &tracing_eval_map_fops);
5838 }
5839 
5840 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5841 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5842 static inline void trace_insert_eval_map_file(struct module *mod,
5843 			      struct trace_eval_map **start, int len) { }
5844 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5845 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5846 static void trace_insert_eval_map(struct module *mod,
5847 				  struct trace_eval_map **start, int len)
5848 {
5849 	struct trace_eval_map **map;
5850 
5851 	if (len <= 0)
5852 		return;
5853 
5854 	map = start;
5855 
5856 	trace_event_eval_update(map, len);
5857 
5858 	trace_insert_eval_map_file(mod, start, len);
5859 }
5860 
5861 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5862 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5863 		       size_t cnt, loff_t *ppos)
5864 {
5865 	struct trace_array *tr = filp->private_data;
5866 	char buf[MAX_TRACER_SIZE+2];
5867 	int r;
5868 
5869 	mutex_lock(&trace_types_lock);
5870 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5871 	mutex_unlock(&trace_types_lock);
5872 
5873 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5874 }
5875 
tracer_init(struct tracer * t,struct trace_array * tr)5876 int tracer_init(struct tracer *t, struct trace_array *tr)
5877 {
5878 	tracing_reset_online_cpus(&tr->array_buffer);
5879 	return t->init(tr);
5880 }
5881 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5882 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5883 {
5884 	int cpu;
5885 
5886 	for_each_tracing_cpu(cpu)
5887 		per_cpu_ptr(buf->data, cpu)->entries = val;
5888 }
5889 
update_buffer_entries(struct array_buffer * buf,int cpu)5890 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5891 {
5892 	if (cpu == RING_BUFFER_ALL_CPUS) {
5893 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5894 	} else {
5895 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5896 	}
5897 }
5898 
5899 #ifdef CONFIG_TRACER_MAX_TRACE
5900 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5901 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5902 					struct array_buffer *size_buf, int cpu_id)
5903 {
5904 	int cpu, ret = 0;
5905 
5906 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5907 		for_each_tracing_cpu(cpu) {
5908 			ret = ring_buffer_resize(trace_buf->buffer,
5909 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5910 			if (ret < 0)
5911 				break;
5912 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5913 				per_cpu_ptr(size_buf->data, cpu)->entries;
5914 		}
5915 	} else {
5916 		ret = ring_buffer_resize(trace_buf->buffer,
5917 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5918 		if (ret == 0)
5919 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5920 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5921 	}
5922 
5923 	return ret;
5924 }
5925 #endif /* CONFIG_TRACER_MAX_TRACE */
5926 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5927 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5928 					unsigned long size, int cpu)
5929 {
5930 	int ret;
5931 
5932 	/*
5933 	 * If kernel or user changes the size of the ring buffer
5934 	 * we use the size that was given, and we can forget about
5935 	 * expanding it later.
5936 	 */
5937 	trace_set_ring_buffer_expanded(tr);
5938 
5939 	/* May be called before buffers are initialized */
5940 	if (!tr->array_buffer.buffer)
5941 		return 0;
5942 
5943 	/* Do not allow tracing while resizing ring buffer */
5944 	tracing_stop_tr(tr);
5945 
5946 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5947 	if (ret < 0)
5948 		goto out_start;
5949 
5950 #ifdef CONFIG_TRACER_MAX_TRACE
5951 	if (!tr->allocated_snapshot)
5952 		goto out;
5953 
5954 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5955 	if (ret < 0) {
5956 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5957 						     &tr->array_buffer, cpu);
5958 		if (r < 0) {
5959 			/*
5960 			 * AARGH! We are left with different
5961 			 * size max buffer!!!!
5962 			 * The max buffer is our "snapshot" buffer.
5963 			 * When a tracer needs a snapshot (one of the
5964 			 * latency tracers), it swaps the max buffer
5965 			 * with the saved snap shot. We succeeded to
5966 			 * update the size of the main buffer, but failed to
5967 			 * update the size of the max buffer. But when we tried
5968 			 * to reset the main buffer to the original size, we
5969 			 * failed there too. This is very unlikely to
5970 			 * happen, but if it does, warn and kill all
5971 			 * tracing.
5972 			 */
5973 			WARN_ON(1);
5974 			tracing_disabled = 1;
5975 		}
5976 		goto out_start;
5977 	}
5978 
5979 	update_buffer_entries(&tr->max_buffer, cpu);
5980 
5981  out:
5982 #endif /* CONFIG_TRACER_MAX_TRACE */
5983 
5984 	update_buffer_entries(&tr->array_buffer, cpu);
5985  out_start:
5986 	tracing_start_tr(tr);
5987 	return ret;
5988 }
5989 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5990 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5991 				  unsigned long size, int cpu_id)
5992 {
5993 	guard(mutex)(&trace_types_lock);
5994 
5995 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
5996 		/* make sure, this cpu is enabled in the mask */
5997 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask))
5998 			return -EINVAL;
5999 	}
6000 
6001 	return __tracing_resize_ring_buffer(tr, size, cpu_id);
6002 }
6003 
update_last_data(struct trace_array * tr)6004 static void update_last_data(struct trace_array *tr)
6005 {
6006 	if (!tr->text_delta && !tr->data_delta)
6007 		return;
6008 
6009 	/*
6010 	 * Need to clear all CPU buffers as there cannot be events
6011 	 * from the previous boot mixed with events with this boot
6012 	 * as that will cause a confusing trace. Need to clear all
6013 	 * CPU buffers, even for those that may currently be offline.
6014 	 */
6015 	tracing_reset_all_cpus(&tr->array_buffer);
6016 
6017 	/* Using current data now */
6018 	tr->text_delta = 0;
6019 	tr->data_delta = 0;
6020 }
6021 
6022 /**
6023  * tracing_update_buffers - used by tracing facility to expand ring buffers
6024  * @tr: The tracing instance
6025  *
6026  * To save on memory when the tracing is never used on a system with it
6027  * configured in. The ring buffers are set to a minimum size. But once
6028  * a user starts to use the tracing facility, then they need to grow
6029  * to their default size.
6030  *
6031  * This function is to be called when a tracer is about to be used.
6032  */
tracing_update_buffers(struct trace_array * tr)6033 int tracing_update_buffers(struct trace_array *tr)
6034 {
6035 	int ret = 0;
6036 
6037 	mutex_lock(&trace_types_lock);
6038 
6039 	update_last_data(tr);
6040 
6041 	if (!tr->ring_buffer_expanded)
6042 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6043 						RING_BUFFER_ALL_CPUS);
6044 	mutex_unlock(&trace_types_lock);
6045 
6046 	return ret;
6047 }
6048 
6049 struct trace_option_dentry;
6050 
6051 static void
6052 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6053 
6054 /*
6055  * Used to clear out the tracer before deletion of an instance.
6056  * Must have trace_types_lock held.
6057  */
tracing_set_nop(struct trace_array * tr)6058 static void tracing_set_nop(struct trace_array *tr)
6059 {
6060 	if (tr->current_trace == &nop_trace)
6061 		return;
6062 
6063 	tr->current_trace->enabled--;
6064 
6065 	if (tr->current_trace->reset)
6066 		tr->current_trace->reset(tr);
6067 
6068 	tr->current_trace = &nop_trace;
6069 }
6070 
6071 static bool tracer_options_updated;
6072 
add_tracer_options(struct trace_array * tr,struct tracer * t)6073 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6074 {
6075 	/* Only enable if the directory has been created already. */
6076 	if (!tr->dir)
6077 		return;
6078 
6079 	/* Only create trace option files after update_tracer_options finish */
6080 	if (!tracer_options_updated)
6081 		return;
6082 
6083 	create_trace_option_files(tr, t);
6084 }
6085 
tracing_set_tracer(struct trace_array * tr,const char * buf)6086 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6087 {
6088 	struct tracer *t;
6089 #ifdef CONFIG_TRACER_MAX_TRACE
6090 	bool had_max_tr;
6091 #endif
6092 	int ret;
6093 
6094 	guard(mutex)(&trace_types_lock);
6095 
6096 	update_last_data(tr);
6097 
6098 	if (!tr->ring_buffer_expanded) {
6099 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6100 						RING_BUFFER_ALL_CPUS);
6101 		if (ret < 0)
6102 			return ret;
6103 		ret = 0;
6104 	}
6105 
6106 	for (t = trace_types; t; t = t->next) {
6107 		if (strcmp(t->name, buf) == 0)
6108 			break;
6109 	}
6110 	if (!t)
6111 		return -EINVAL;
6112 
6113 	if (t == tr->current_trace)
6114 		return 0;
6115 
6116 #ifdef CONFIG_TRACER_SNAPSHOT
6117 	if (t->use_max_tr) {
6118 		local_irq_disable();
6119 		arch_spin_lock(&tr->max_lock);
6120 		ret = tr->cond_snapshot ? -EBUSY : 0;
6121 		arch_spin_unlock(&tr->max_lock);
6122 		local_irq_enable();
6123 		if (ret)
6124 			return ret;
6125 	}
6126 #endif
6127 	/* Some tracers won't work on kernel command line */
6128 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6129 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6130 			t->name);
6131 		return -EINVAL;
6132 	}
6133 
6134 	/* Some tracers are only allowed for the top level buffer */
6135 	if (!trace_ok_for_array(t, tr))
6136 		return -EINVAL;
6137 
6138 	/* If trace pipe files are being read, we can't change the tracer */
6139 	if (tr->trace_ref)
6140 		return -EBUSY;
6141 
6142 	trace_branch_disable();
6143 
6144 	tr->current_trace->enabled--;
6145 
6146 	if (tr->current_trace->reset)
6147 		tr->current_trace->reset(tr);
6148 
6149 #ifdef CONFIG_TRACER_MAX_TRACE
6150 	had_max_tr = tr->current_trace->use_max_tr;
6151 
6152 	/* Current trace needs to be nop_trace before synchronize_rcu */
6153 	tr->current_trace = &nop_trace;
6154 
6155 	if (had_max_tr && !t->use_max_tr) {
6156 		/*
6157 		 * We need to make sure that the update_max_tr sees that
6158 		 * current_trace changed to nop_trace to keep it from
6159 		 * swapping the buffers after we resize it.
6160 		 * The update_max_tr is called from interrupts disabled
6161 		 * so a synchronized_sched() is sufficient.
6162 		 */
6163 		synchronize_rcu();
6164 		free_snapshot(tr);
6165 		tracing_disarm_snapshot(tr);
6166 	}
6167 
6168 	if (!had_max_tr && t->use_max_tr) {
6169 		ret = tracing_arm_snapshot_locked(tr);
6170 		if (ret)
6171 			return ret;
6172 	}
6173 #else
6174 	tr->current_trace = &nop_trace;
6175 #endif
6176 
6177 	if (t->init) {
6178 		ret = tracer_init(t, tr);
6179 		if (ret) {
6180 #ifdef CONFIG_TRACER_MAX_TRACE
6181 			if (t->use_max_tr)
6182 				tracing_disarm_snapshot(tr);
6183 #endif
6184 			return ret;
6185 		}
6186 	}
6187 
6188 	tr->current_trace = t;
6189 	tr->current_trace->enabled++;
6190 	trace_branch_enable(tr);
6191 
6192 	return 0;
6193 }
6194 
6195 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6196 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6197 			size_t cnt, loff_t *ppos)
6198 {
6199 	struct trace_array *tr = filp->private_data;
6200 	char buf[MAX_TRACER_SIZE+1];
6201 	char *name;
6202 	size_t ret;
6203 	int err;
6204 
6205 	ret = cnt;
6206 
6207 	if (cnt > MAX_TRACER_SIZE)
6208 		cnt = MAX_TRACER_SIZE;
6209 
6210 	if (copy_from_user(buf, ubuf, cnt))
6211 		return -EFAULT;
6212 
6213 	buf[cnt] = 0;
6214 
6215 	name = strim(buf);
6216 
6217 	err = tracing_set_tracer(tr, name);
6218 	if (err)
6219 		return err;
6220 
6221 	*ppos += ret;
6222 
6223 	return ret;
6224 }
6225 
6226 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6227 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6228 		   size_t cnt, loff_t *ppos)
6229 {
6230 	char buf[64];
6231 	int r;
6232 
6233 	r = snprintf(buf, sizeof(buf), "%ld\n",
6234 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6235 	if (r > sizeof(buf))
6236 		r = sizeof(buf);
6237 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6238 }
6239 
6240 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6241 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6242 		    size_t cnt, loff_t *ppos)
6243 {
6244 	unsigned long val;
6245 	int ret;
6246 
6247 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6248 	if (ret)
6249 		return ret;
6250 
6251 	*ptr = val * 1000;
6252 
6253 	return cnt;
6254 }
6255 
6256 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6257 tracing_thresh_read(struct file *filp, char __user *ubuf,
6258 		    size_t cnt, loff_t *ppos)
6259 {
6260 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6261 }
6262 
6263 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6264 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6265 		     size_t cnt, loff_t *ppos)
6266 {
6267 	struct trace_array *tr = filp->private_data;
6268 	int ret;
6269 
6270 	guard(mutex)(&trace_types_lock);
6271 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6272 	if (ret < 0)
6273 		return ret;
6274 
6275 	if (tr->current_trace->update_thresh) {
6276 		ret = tr->current_trace->update_thresh(tr);
6277 		if (ret < 0)
6278 			return ret;
6279 	}
6280 
6281 	return cnt;
6282 }
6283 
6284 #ifdef CONFIG_TRACER_MAX_TRACE
6285 
6286 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6287 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6288 		     size_t cnt, loff_t *ppos)
6289 {
6290 	struct trace_array *tr = filp->private_data;
6291 
6292 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6293 }
6294 
6295 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6296 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6297 		      size_t cnt, loff_t *ppos)
6298 {
6299 	struct trace_array *tr = filp->private_data;
6300 
6301 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6302 }
6303 
6304 #endif
6305 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6306 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6307 {
6308 	if (cpu == RING_BUFFER_ALL_CPUS) {
6309 		if (cpumask_empty(tr->pipe_cpumask)) {
6310 			cpumask_setall(tr->pipe_cpumask);
6311 			return 0;
6312 		}
6313 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6314 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6315 		return 0;
6316 	}
6317 	return -EBUSY;
6318 }
6319 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6320 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6321 {
6322 	if (cpu == RING_BUFFER_ALL_CPUS) {
6323 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6324 		cpumask_clear(tr->pipe_cpumask);
6325 	} else {
6326 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6327 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6328 	}
6329 }
6330 
tracing_open_pipe(struct inode * inode,struct file * filp)6331 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6332 {
6333 	struct trace_array *tr = inode->i_private;
6334 	struct trace_iterator *iter;
6335 	int cpu;
6336 	int ret;
6337 
6338 	ret = tracing_check_open_get_tr(tr);
6339 	if (ret)
6340 		return ret;
6341 
6342 	mutex_lock(&trace_types_lock);
6343 	cpu = tracing_get_cpu(inode);
6344 	ret = open_pipe_on_cpu(tr, cpu);
6345 	if (ret)
6346 		goto fail_pipe_on_cpu;
6347 
6348 	/* create a buffer to store the information to pass to userspace */
6349 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6350 	if (!iter) {
6351 		ret = -ENOMEM;
6352 		goto fail_alloc_iter;
6353 	}
6354 
6355 	trace_seq_init(&iter->seq);
6356 	iter->trace = tr->current_trace;
6357 
6358 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6359 		ret = -ENOMEM;
6360 		goto fail;
6361 	}
6362 
6363 	/* trace pipe does not show start of buffer */
6364 	cpumask_setall(iter->started);
6365 
6366 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6367 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6368 
6369 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6370 	if (trace_clocks[tr->clock_id].in_ns)
6371 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6372 
6373 	iter->tr = tr;
6374 	iter->array_buffer = &tr->array_buffer;
6375 	iter->cpu_file = cpu;
6376 	mutex_init(&iter->mutex);
6377 	filp->private_data = iter;
6378 
6379 	if (iter->trace->pipe_open)
6380 		iter->trace->pipe_open(iter);
6381 
6382 	nonseekable_open(inode, filp);
6383 
6384 	tr->trace_ref++;
6385 
6386 	mutex_unlock(&trace_types_lock);
6387 	return ret;
6388 
6389 fail:
6390 	kfree(iter);
6391 fail_alloc_iter:
6392 	close_pipe_on_cpu(tr, cpu);
6393 fail_pipe_on_cpu:
6394 	__trace_array_put(tr);
6395 	mutex_unlock(&trace_types_lock);
6396 	return ret;
6397 }
6398 
tracing_release_pipe(struct inode * inode,struct file * file)6399 static int tracing_release_pipe(struct inode *inode, struct file *file)
6400 {
6401 	struct trace_iterator *iter = file->private_data;
6402 	struct trace_array *tr = inode->i_private;
6403 
6404 	mutex_lock(&trace_types_lock);
6405 
6406 	tr->trace_ref--;
6407 
6408 	if (iter->trace->pipe_close)
6409 		iter->trace->pipe_close(iter);
6410 	close_pipe_on_cpu(tr, iter->cpu_file);
6411 	mutex_unlock(&trace_types_lock);
6412 
6413 	free_trace_iter_content(iter);
6414 	kfree(iter);
6415 
6416 	trace_array_put(tr);
6417 
6418 	return 0;
6419 }
6420 
6421 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6422 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6423 {
6424 	struct trace_array *tr = iter->tr;
6425 
6426 	/* Iterators are static, they should be filled or empty */
6427 	if (trace_buffer_iter(iter, iter->cpu_file))
6428 		return EPOLLIN | EPOLLRDNORM;
6429 
6430 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6431 		/*
6432 		 * Always select as readable when in blocking mode
6433 		 */
6434 		return EPOLLIN | EPOLLRDNORM;
6435 	else
6436 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6437 					     filp, poll_table, iter->tr->buffer_percent);
6438 }
6439 
6440 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6441 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6442 {
6443 	struct trace_iterator *iter = filp->private_data;
6444 
6445 	return trace_poll(iter, filp, poll_table);
6446 }
6447 
6448 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6449 static int tracing_wait_pipe(struct file *filp)
6450 {
6451 	struct trace_iterator *iter = filp->private_data;
6452 	int ret;
6453 
6454 	while (trace_empty(iter)) {
6455 
6456 		if ((filp->f_flags & O_NONBLOCK)) {
6457 			return -EAGAIN;
6458 		}
6459 
6460 		/*
6461 		 * We block until we read something and tracing is disabled.
6462 		 * We still block if tracing is disabled, but we have never
6463 		 * read anything. This allows a user to cat this file, and
6464 		 * then enable tracing. But after we have read something,
6465 		 * we give an EOF when tracing is again disabled.
6466 		 *
6467 		 * iter->pos will be 0 if we haven't read anything.
6468 		 */
6469 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6470 			break;
6471 
6472 		mutex_unlock(&iter->mutex);
6473 
6474 		ret = wait_on_pipe(iter, 0);
6475 
6476 		mutex_lock(&iter->mutex);
6477 
6478 		if (ret)
6479 			return ret;
6480 	}
6481 
6482 	return 1;
6483 }
6484 
6485 /*
6486  * Consumer reader.
6487  */
6488 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6489 tracing_read_pipe(struct file *filp, char __user *ubuf,
6490 		  size_t cnt, loff_t *ppos)
6491 {
6492 	struct trace_iterator *iter = filp->private_data;
6493 	ssize_t sret;
6494 
6495 	/*
6496 	 * Avoid more than one consumer on a single file descriptor
6497 	 * This is just a matter of traces coherency, the ring buffer itself
6498 	 * is protected.
6499 	 */
6500 	guard(mutex)(&iter->mutex);
6501 
6502 	/* return any leftover data */
6503 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6504 	if (sret != -EBUSY)
6505 		return sret;
6506 
6507 	trace_seq_init(&iter->seq);
6508 
6509 	if (iter->trace->read) {
6510 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6511 		if (sret)
6512 			return sret;
6513 	}
6514 
6515 waitagain:
6516 	sret = tracing_wait_pipe(filp);
6517 	if (sret <= 0)
6518 		return sret;
6519 
6520 	/* stop when tracing is finished */
6521 	if (trace_empty(iter))
6522 		return 0;
6523 
6524 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6525 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6526 
6527 	/* reset all but tr, trace, and overruns */
6528 	trace_iterator_reset(iter);
6529 	cpumask_clear(iter->started);
6530 	trace_seq_init(&iter->seq);
6531 
6532 	trace_event_read_lock();
6533 	trace_access_lock(iter->cpu_file);
6534 	while (trace_find_next_entry_inc(iter) != NULL) {
6535 		enum print_line_t ret;
6536 		int save_len = iter->seq.seq.len;
6537 
6538 		ret = print_trace_line(iter);
6539 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6540 			/*
6541 			 * If one print_trace_line() fills entire trace_seq in one shot,
6542 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6543 			 * In this case, we need to consume it, otherwise, loop will peek
6544 			 * this event next time, resulting in an infinite loop.
6545 			 */
6546 			if (save_len == 0) {
6547 				iter->seq.full = 0;
6548 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6549 				trace_consume(iter);
6550 				break;
6551 			}
6552 
6553 			/* In other cases, don't print partial lines */
6554 			iter->seq.seq.len = save_len;
6555 			break;
6556 		}
6557 		if (ret != TRACE_TYPE_NO_CONSUME)
6558 			trace_consume(iter);
6559 
6560 		if (trace_seq_used(&iter->seq) >= cnt)
6561 			break;
6562 
6563 		/*
6564 		 * Setting the full flag means we reached the trace_seq buffer
6565 		 * size and we should leave by partial output condition above.
6566 		 * One of the trace_seq_* functions is not used properly.
6567 		 */
6568 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6569 			  iter->ent->type);
6570 	}
6571 	trace_access_unlock(iter->cpu_file);
6572 	trace_event_read_unlock();
6573 
6574 	/* Now copy what we have to the user */
6575 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6576 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6577 		trace_seq_init(&iter->seq);
6578 
6579 	/*
6580 	 * If there was nothing to send to user, in spite of consuming trace
6581 	 * entries, go back to wait for more entries.
6582 	 */
6583 	if (sret == -EBUSY)
6584 		goto waitagain;
6585 
6586 	return sret;
6587 }
6588 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6589 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6590 				     unsigned int idx)
6591 {
6592 	__free_page(spd->pages[idx]);
6593 }
6594 
6595 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6596 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6597 {
6598 	size_t count;
6599 	int save_len;
6600 	int ret;
6601 
6602 	/* Seq buffer is page-sized, exactly what we need. */
6603 	for (;;) {
6604 		save_len = iter->seq.seq.len;
6605 		ret = print_trace_line(iter);
6606 
6607 		if (trace_seq_has_overflowed(&iter->seq)) {
6608 			iter->seq.seq.len = save_len;
6609 			break;
6610 		}
6611 
6612 		/*
6613 		 * This should not be hit, because it should only
6614 		 * be set if the iter->seq overflowed. But check it
6615 		 * anyway to be safe.
6616 		 */
6617 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6618 			iter->seq.seq.len = save_len;
6619 			break;
6620 		}
6621 
6622 		count = trace_seq_used(&iter->seq) - save_len;
6623 		if (rem < count) {
6624 			rem = 0;
6625 			iter->seq.seq.len = save_len;
6626 			break;
6627 		}
6628 
6629 		if (ret != TRACE_TYPE_NO_CONSUME)
6630 			trace_consume(iter);
6631 		rem -= count;
6632 		if (!trace_find_next_entry_inc(iter))	{
6633 			rem = 0;
6634 			iter->ent = NULL;
6635 			break;
6636 		}
6637 	}
6638 
6639 	return rem;
6640 }
6641 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6642 static ssize_t tracing_splice_read_pipe(struct file *filp,
6643 					loff_t *ppos,
6644 					struct pipe_inode_info *pipe,
6645 					size_t len,
6646 					unsigned int flags)
6647 {
6648 	struct page *pages_def[PIPE_DEF_BUFFERS];
6649 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6650 	struct trace_iterator *iter = filp->private_data;
6651 	struct splice_pipe_desc spd = {
6652 		.pages		= pages_def,
6653 		.partial	= partial_def,
6654 		.nr_pages	= 0, /* This gets updated below. */
6655 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6656 		.ops		= &default_pipe_buf_ops,
6657 		.spd_release	= tracing_spd_release_pipe,
6658 	};
6659 	ssize_t ret;
6660 	size_t rem;
6661 	unsigned int i;
6662 
6663 	if (splice_grow_spd(pipe, &spd))
6664 		return -ENOMEM;
6665 
6666 	mutex_lock(&iter->mutex);
6667 
6668 	if (iter->trace->splice_read) {
6669 		ret = iter->trace->splice_read(iter, filp,
6670 					       ppos, pipe, len, flags);
6671 		if (ret)
6672 			goto out_err;
6673 	}
6674 
6675 	ret = tracing_wait_pipe(filp);
6676 	if (ret <= 0)
6677 		goto out_err;
6678 
6679 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6680 		ret = -EFAULT;
6681 		goto out_err;
6682 	}
6683 
6684 	trace_event_read_lock();
6685 	trace_access_lock(iter->cpu_file);
6686 
6687 	/* Fill as many pages as possible. */
6688 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6689 		spd.pages[i] = alloc_page(GFP_KERNEL);
6690 		if (!spd.pages[i])
6691 			break;
6692 
6693 		rem = tracing_fill_pipe_page(rem, iter);
6694 
6695 		/* Copy the data into the page, so we can start over. */
6696 		ret = trace_seq_to_buffer(&iter->seq,
6697 					  page_address(spd.pages[i]),
6698 					  trace_seq_used(&iter->seq));
6699 		if (ret < 0) {
6700 			__free_page(spd.pages[i]);
6701 			break;
6702 		}
6703 		spd.partial[i].offset = 0;
6704 		spd.partial[i].len = trace_seq_used(&iter->seq);
6705 
6706 		trace_seq_init(&iter->seq);
6707 	}
6708 
6709 	trace_access_unlock(iter->cpu_file);
6710 	trace_event_read_unlock();
6711 	mutex_unlock(&iter->mutex);
6712 
6713 	spd.nr_pages = i;
6714 
6715 	if (i)
6716 		ret = splice_to_pipe(pipe, &spd);
6717 	else
6718 		ret = 0;
6719 out:
6720 	splice_shrink_spd(&spd);
6721 	return ret;
6722 
6723 out_err:
6724 	mutex_unlock(&iter->mutex);
6725 	goto out;
6726 }
6727 
6728 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6729 tracing_entries_read(struct file *filp, char __user *ubuf,
6730 		     size_t cnt, loff_t *ppos)
6731 {
6732 	struct inode *inode = file_inode(filp);
6733 	struct trace_array *tr = inode->i_private;
6734 	int cpu = tracing_get_cpu(inode);
6735 	char buf[64];
6736 	int r = 0;
6737 	ssize_t ret;
6738 
6739 	mutex_lock(&trace_types_lock);
6740 
6741 	if (cpu == RING_BUFFER_ALL_CPUS) {
6742 		int cpu, buf_size_same;
6743 		unsigned long size;
6744 
6745 		size = 0;
6746 		buf_size_same = 1;
6747 		/* check if all cpu sizes are same */
6748 		for_each_tracing_cpu(cpu) {
6749 			/* fill in the size from first enabled cpu */
6750 			if (size == 0)
6751 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6752 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6753 				buf_size_same = 0;
6754 				break;
6755 			}
6756 		}
6757 
6758 		if (buf_size_same) {
6759 			if (!tr->ring_buffer_expanded)
6760 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6761 					    size >> 10,
6762 					    trace_buf_size >> 10);
6763 			else
6764 				r = sprintf(buf, "%lu\n", size >> 10);
6765 		} else
6766 			r = sprintf(buf, "X\n");
6767 	} else
6768 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6769 
6770 	mutex_unlock(&trace_types_lock);
6771 
6772 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6773 	return ret;
6774 }
6775 
6776 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6777 tracing_entries_write(struct file *filp, const char __user *ubuf,
6778 		      size_t cnt, loff_t *ppos)
6779 {
6780 	struct inode *inode = file_inode(filp);
6781 	struct trace_array *tr = inode->i_private;
6782 	unsigned long val;
6783 	int ret;
6784 
6785 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6786 	if (ret)
6787 		return ret;
6788 
6789 	/* must have at least 1 entry */
6790 	if (!val)
6791 		return -EINVAL;
6792 
6793 	/* value is in KB */
6794 	val <<= 10;
6795 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6796 	if (ret < 0)
6797 		return ret;
6798 
6799 	*ppos += cnt;
6800 
6801 	return cnt;
6802 }
6803 
6804 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6805 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6806 				size_t cnt, loff_t *ppos)
6807 {
6808 	struct trace_array *tr = filp->private_data;
6809 	char buf[64];
6810 	int r, cpu;
6811 	unsigned long size = 0, expanded_size = 0;
6812 
6813 	mutex_lock(&trace_types_lock);
6814 	for_each_tracing_cpu(cpu) {
6815 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6816 		if (!tr->ring_buffer_expanded)
6817 			expanded_size += trace_buf_size >> 10;
6818 	}
6819 	if (tr->ring_buffer_expanded)
6820 		r = sprintf(buf, "%lu\n", size);
6821 	else
6822 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6823 	mutex_unlock(&trace_types_lock);
6824 
6825 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6826 }
6827 
6828 static ssize_t
tracing_last_boot_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6829 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6830 {
6831 	struct trace_array *tr = filp->private_data;
6832 	struct seq_buf seq;
6833 	char buf[64];
6834 
6835 	seq_buf_init(&seq, buf, 64);
6836 
6837 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6838 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6839 
6840 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6841 }
6842 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6843 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6844 {
6845 	struct trace_array *tr = inode->i_private;
6846 	int cpu = tracing_get_cpu(inode);
6847 	int ret;
6848 
6849 	ret = tracing_check_open_get_tr(tr);
6850 	if (ret)
6851 		return ret;
6852 
6853 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6854 	if (ret < 0)
6855 		__trace_array_put(tr);
6856 	return ret;
6857 }
6858 
6859 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6860 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6861 			  size_t cnt, loff_t *ppos)
6862 {
6863 	/*
6864 	 * There is no need to read what the user has written, this function
6865 	 * is just to make sure that there is no error when "echo" is used
6866 	 */
6867 
6868 	*ppos += cnt;
6869 
6870 	return cnt;
6871 }
6872 
6873 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6874 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6875 {
6876 	struct trace_array *tr = inode->i_private;
6877 
6878 	/* disable tracing ? */
6879 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6880 		tracer_tracing_off(tr);
6881 	/* resize the ring buffer to 0 */
6882 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6883 
6884 	trace_array_put(tr);
6885 
6886 	return 0;
6887 }
6888 
6889 #define TRACE_MARKER_MAX_SIZE		4096
6890 
6891 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6892 tracing_mark_write(struct file *filp, const char __user *ubuf,
6893 					size_t cnt, loff_t *fpos)
6894 {
6895 	struct trace_array *tr = filp->private_data;
6896 	struct ring_buffer_event *event;
6897 	enum event_trigger_type tt = ETT_NONE;
6898 	struct trace_buffer *buffer;
6899 	struct print_entry *entry;
6900 	int meta_size;
6901 	ssize_t written;
6902 	size_t size;
6903 	int len;
6904 
6905 /* Used in tracing_mark_raw_write() as well */
6906 #define FAULTED_STR "<faulted>"
6907 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6908 
6909 	if (tracing_disabled)
6910 		return -EINVAL;
6911 
6912 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6913 		return -EINVAL;
6914 
6915 	if ((ssize_t)cnt < 0)
6916 		return -EINVAL;
6917 
6918 	if (cnt > TRACE_MARKER_MAX_SIZE)
6919 		cnt = TRACE_MARKER_MAX_SIZE;
6920 
6921 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6922  again:
6923 	size = cnt + meta_size;
6924 
6925 	/* If less than "<faulted>", then make sure we can still add that */
6926 	if (cnt < FAULTED_SIZE)
6927 		size += FAULTED_SIZE - cnt;
6928 
6929 	buffer = tr->array_buffer.buffer;
6930 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6931 					    tracing_gen_ctx());
6932 	if (unlikely(!event)) {
6933 		/*
6934 		 * If the size was greater than what was allowed, then
6935 		 * make it smaller and try again.
6936 		 */
6937 		if (size > ring_buffer_max_event_size(buffer)) {
6938 			/* cnt < FAULTED size should never be bigger than max */
6939 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6940 				return -EBADF;
6941 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6942 			/* The above should only happen once */
6943 			if (WARN_ON_ONCE(cnt + meta_size == size))
6944 				return -EBADF;
6945 			goto again;
6946 		}
6947 
6948 		/* Ring buffer disabled, return as if not open for write */
6949 		return -EBADF;
6950 	}
6951 
6952 	entry = ring_buffer_event_data(event);
6953 	entry->ip = _THIS_IP_;
6954 
6955 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6956 	if (len) {
6957 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6958 		cnt = FAULTED_SIZE;
6959 		written = -EFAULT;
6960 	} else
6961 		written = cnt;
6962 
6963 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6964 		/* do not add \n before testing triggers, but add \0 */
6965 		entry->buf[cnt] = '\0';
6966 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
6967 	}
6968 
6969 	if (entry->buf[cnt - 1] != '\n') {
6970 		entry->buf[cnt] = '\n';
6971 		entry->buf[cnt + 1] = '\0';
6972 	} else
6973 		entry->buf[cnt] = '\0';
6974 
6975 	if (static_branch_unlikely(&trace_marker_exports_enabled))
6976 		ftrace_exports(event, TRACE_EXPORT_MARKER);
6977 	__buffer_unlock_commit(buffer, event);
6978 
6979 	if (tt)
6980 		event_triggers_post_call(tr->trace_marker_file, tt);
6981 
6982 	return written;
6983 }
6984 
6985 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6986 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
6987 					size_t cnt, loff_t *fpos)
6988 {
6989 	struct trace_array *tr = filp->private_data;
6990 	struct ring_buffer_event *event;
6991 	struct trace_buffer *buffer;
6992 	struct raw_data_entry *entry;
6993 	ssize_t written;
6994 	int size;
6995 	int len;
6996 
6997 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
6998 
6999 	if (tracing_disabled)
7000 		return -EINVAL;
7001 
7002 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7003 		return -EINVAL;
7004 
7005 	/* The marker must at least have a tag id */
7006 	if (cnt < sizeof(unsigned int))
7007 		return -EINVAL;
7008 
7009 	size = sizeof(*entry) + cnt;
7010 	if (cnt < FAULT_SIZE_ID)
7011 		size += FAULT_SIZE_ID - cnt;
7012 
7013 	buffer = tr->array_buffer.buffer;
7014 
7015 	if (size > ring_buffer_max_event_size(buffer))
7016 		return -EINVAL;
7017 
7018 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7019 					    tracing_gen_ctx());
7020 	if (!event)
7021 		/* Ring buffer disabled, return as if not open for write */
7022 		return -EBADF;
7023 
7024 	entry = ring_buffer_event_data(event);
7025 
7026 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7027 	if (len) {
7028 		entry->id = -1;
7029 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7030 		written = -EFAULT;
7031 	} else
7032 		written = cnt;
7033 
7034 	__buffer_unlock_commit(buffer, event);
7035 
7036 	return written;
7037 }
7038 
tracing_clock_show(struct seq_file * m,void * v)7039 static int tracing_clock_show(struct seq_file *m, void *v)
7040 {
7041 	struct trace_array *tr = m->private;
7042 	int i;
7043 
7044 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7045 		seq_printf(m,
7046 			"%s%s%s%s", i ? " " : "",
7047 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7048 			i == tr->clock_id ? "]" : "");
7049 	seq_putc(m, '\n');
7050 
7051 	return 0;
7052 }
7053 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7054 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7055 {
7056 	int i;
7057 
7058 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7059 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7060 			break;
7061 	}
7062 	if (i == ARRAY_SIZE(trace_clocks))
7063 		return -EINVAL;
7064 
7065 	mutex_lock(&trace_types_lock);
7066 
7067 	tr->clock_id = i;
7068 
7069 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7070 
7071 	/*
7072 	 * New clock may not be consistent with the previous clock.
7073 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7074 	 */
7075 	tracing_reset_online_cpus(&tr->array_buffer);
7076 
7077 #ifdef CONFIG_TRACER_MAX_TRACE
7078 	if (tr->max_buffer.buffer)
7079 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7080 	tracing_reset_online_cpus(&tr->max_buffer);
7081 #endif
7082 
7083 	mutex_unlock(&trace_types_lock);
7084 
7085 	return 0;
7086 }
7087 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7088 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7089 				   size_t cnt, loff_t *fpos)
7090 {
7091 	struct seq_file *m = filp->private_data;
7092 	struct trace_array *tr = m->private;
7093 	char buf[64];
7094 	const char *clockstr;
7095 	int ret;
7096 
7097 	if (cnt >= sizeof(buf))
7098 		return -EINVAL;
7099 
7100 	if (copy_from_user(buf, ubuf, cnt))
7101 		return -EFAULT;
7102 
7103 	buf[cnt] = 0;
7104 
7105 	clockstr = strstrip(buf);
7106 
7107 	ret = tracing_set_clock(tr, clockstr);
7108 	if (ret)
7109 		return ret;
7110 
7111 	*fpos += cnt;
7112 
7113 	return cnt;
7114 }
7115 
tracing_clock_open(struct inode * inode,struct file * file)7116 static int tracing_clock_open(struct inode *inode, struct file *file)
7117 {
7118 	struct trace_array *tr = inode->i_private;
7119 	int ret;
7120 
7121 	ret = tracing_check_open_get_tr(tr);
7122 	if (ret)
7123 		return ret;
7124 
7125 	ret = single_open(file, tracing_clock_show, inode->i_private);
7126 	if (ret < 0)
7127 		trace_array_put(tr);
7128 
7129 	return ret;
7130 }
7131 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7132 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7133 {
7134 	struct trace_array *tr = m->private;
7135 
7136 	mutex_lock(&trace_types_lock);
7137 
7138 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7139 		seq_puts(m, "delta [absolute]\n");
7140 	else
7141 		seq_puts(m, "[delta] absolute\n");
7142 
7143 	mutex_unlock(&trace_types_lock);
7144 
7145 	return 0;
7146 }
7147 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7148 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7149 {
7150 	struct trace_array *tr = inode->i_private;
7151 	int ret;
7152 
7153 	ret = tracing_check_open_get_tr(tr);
7154 	if (ret)
7155 		return ret;
7156 
7157 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7158 	if (ret < 0)
7159 		trace_array_put(tr);
7160 
7161 	return ret;
7162 }
7163 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7164 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7165 {
7166 	if (rbe == this_cpu_read(trace_buffered_event))
7167 		return ring_buffer_time_stamp(buffer);
7168 
7169 	return ring_buffer_event_time_stamp(buffer, rbe);
7170 }
7171 
7172 /*
7173  * Set or disable using the per CPU trace_buffer_event when possible.
7174  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7175 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7176 {
7177 	guard(mutex)(&trace_types_lock);
7178 
7179 	if (set && tr->no_filter_buffering_ref++)
7180 		return 0;
7181 
7182 	if (!set) {
7183 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref))
7184 			return -EINVAL;
7185 
7186 		--tr->no_filter_buffering_ref;
7187 	}
7188 
7189 	return 0;
7190 }
7191 
7192 struct ftrace_buffer_info {
7193 	struct trace_iterator	iter;
7194 	void			*spare;
7195 	unsigned int		spare_cpu;
7196 	unsigned int		spare_size;
7197 	unsigned int		read;
7198 };
7199 
7200 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7201 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7202 {
7203 	struct trace_array *tr = inode->i_private;
7204 	struct trace_iterator *iter;
7205 	struct seq_file *m;
7206 	int ret;
7207 
7208 	ret = tracing_check_open_get_tr(tr);
7209 	if (ret)
7210 		return ret;
7211 
7212 	if (file->f_mode & FMODE_READ) {
7213 		iter = __tracing_open(inode, file, true);
7214 		if (IS_ERR(iter))
7215 			ret = PTR_ERR(iter);
7216 	} else {
7217 		/* Writes still need the seq_file to hold the private data */
7218 		ret = -ENOMEM;
7219 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7220 		if (!m)
7221 			goto out;
7222 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7223 		if (!iter) {
7224 			kfree(m);
7225 			goto out;
7226 		}
7227 		ret = 0;
7228 
7229 		iter->tr = tr;
7230 		iter->array_buffer = &tr->max_buffer;
7231 		iter->cpu_file = tracing_get_cpu(inode);
7232 		m->private = iter;
7233 		file->private_data = m;
7234 	}
7235 out:
7236 	if (ret < 0)
7237 		trace_array_put(tr);
7238 
7239 	return ret;
7240 }
7241 
tracing_swap_cpu_buffer(void * tr)7242 static void tracing_swap_cpu_buffer(void *tr)
7243 {
7244 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7245 }
7246 
7247 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7248 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7249 		       loff_t *ppos)
7250 {
7251 	struct seq_file *m = filp->private_data;
7252 	struct trace_iterator *iter = m->private;
7253 	struct trace_array *tr = iter->tr;
7254 	unsigned long val;
7255 	int ret;
7256 
7257 	ret = tracing_update_buffers(tr);
7258 	if (ret < 0)
7259 		return ret;
7260 
7261 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7262 	if (ret)
7263 		return ret;
7264 
7265 	guard(mutex)(&trace_types_lock);
7266 
7267 	if (tr->current_trace->use_max_tr)
7268 		return -EBUSY;
7269 
7270 	local_irq_disable();
7271 	arch_spin_lock(&tr->max_lock);
7272 	if (tr->cond_snapshot)
7273 		ret = -EBUSY;
7274 	arch_spin_unlock(&tr->max_lock);
7275 	local_irq_enable();
7276 	if (ret)
7277 		return ret;
7278 
7279 	switch (val) {
7280 	case 0:
7281 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7282 			return -EINVAL;
7283 		if (tr->allocated_snapshot)
7284 			free_snapshot(tr);
7285 		break;
7286 	case 1:
7287 /* Only allow per-cpu swap if the ring buffer supports it */
7288 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7289 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS)
7290 			return -EINVAL;
7291 #endif
7292 		if (tr->allocated_snapshot)
7293 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7294 					&tr->array_buffer, iter->cpu_file);
7295 
7296 		ret = tracing_arm_snapshot_locked(tr);
7297 		if (ret)
7298 			return ret;
7299 
7300 		/* Now, we're going to swap */
7301 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7302 			local_irq_disable();
7303 			update_max_tr(tr, current, smp_processor_id(), NULL);
7304 			local_irq_enable();
7305 		} else {
7306 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7307 						 (void *)tr, 1);
7308 		}
7309 		tracing_disarm_snapshot(tr);
7310 		break;
7311 	default:
7312 		if (tr->allocated_snapshot) {
7313 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7314 				tracing_reset_online_cpus(&tr->max_buffer);
7315 			else
7316 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7317 		}
7318 		break;
7319 	}
7320 
7321 	if (ret >= 0) {
7322 		*ppos += cnt;
7323 		ret = cnt;
7324 	}
7325 
7326 	return ret;
7327 }
7328 
tracing_snapshot_release(struct inode * inode,struct file * file)7329 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7330 {
7331 	struct seq_file *m = file->private_data;
7332 	int ret;
7333 
7334 	ret = tracing_release(inode, file);
7335 
7336 	if (file->f_mode & FMODE_READ)
7337 		return ret;
7338 
7339 	/* If write only, the seq_file is just a stub */
7340 	if (m)
7341 		kfree(m->private);
7342 	kfree(m);
7343 
7344 	return 0;
7345 }
7346 
7347 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7348 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7349 				    size_t count, loff_t *ppos);
7350 static int tracing_buffers_release(struct inode *inode, struct file *file);
7351 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7352 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7353 
snapshot_raw_open(struct inode * inode,struct file * filp)7354 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7355 {
7356 	struct ftrace_buffer_info *info;
7357 	int ret;
7358 
7359 	/* The following checks for tracefs lockdown */
7360 	ret = tracing_buffers_open(inode, filp);
7361 	if (ret < 0)
7362 		return ret;
7363 
7364 	info = filp->private_data;
7365 
7366 	if (info->iter.trace->use_max_tr) {
7367 		tracing_buffers_release(inode, filp);
7368 		return -EBUSY;
7369 	}
7370 
7371 	info->iter.snapshot = true;
7372 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7373 
7374 	return ret;
7375 }
7376 
7377 #endif /* CONFIG_TRACER_SNAPSHOT */
7378 
7379 
7380 static const struct file_operations tracing_thresh_fops = {
7381 	.open		= tracing_open_generic,
7382 	.read		= tracing_thresh_read,
7383 	.write		= tracing_thresh_write,
7384 	.llseek		= generic_file_llseek,
7385 };
7386 
7387 #ifdef CONFIG_TRACER_MAX_TRACE
7388 static const struct file_operations tracing_max_lat_fops = {
7389 	.open		= tracing_open_generic_tr,
7390 	.read		= tracing_max_lat_read,
7391 	.write		= tracing_max_lat_write,
7392 	.llseek		= generic_file_llseek,
7393 	.release	= tracing_release_generic_tr,
7394 };
7395 #endif
7396 
7397 static const struct file_operations set_tracer_fops = {
7398 	.open		= tracing_open_generic_tr,
7399 	.read		= tracing_set_trace_read,
7400 	.write		= tracing_set_trace_write,
7401 	.llseek		= generic_file_llseek,
7402 	.release	= tracing_release_generic_tr,
7403 };
7404 
7405 static const struct file_operations tracing_pipe_fops = {
7406 	.open		= tracing_open_pipe,
7407 	.poll		= tracing_poll_pipe,
7408 	.read		= tracing_read_pipe,
7409 	.splice_read	= tracing_splice_read_pipe,
7410 	.release	= tracing_release_pipe,
7411 };
7412 
7413 static const struct file_operations tracing_entries_fops = {
7414 	.open		= tracing_open_generic_tr,
7415 	.read		= tracing_entries_read,
7416 	.write		= tracing_entries_write,
7417 	.llseek		= generic_file_llseek,
7418 	.release	= tracing_release_generic_tr,
7419 };
7420 
7421 static const struct file_operations tracing_buffer_meta_fops = {
7422 	.open		= tracing_buffer_meta_open,
7423 	.read		= seq_read,
7424 	.llseek		= seq_lseek,
7425 	.release	= tracing_seq_release,
7426 };
7427 
7428 static const struct file_operations tracing_total_entries_fops = {
7429 	.open		= tracing_open_generic_tr,
7430 	.read		= tracing_total_entries_read,
7431 	.llseek		= generic_file_llseek,
7432 	.release	= tracing_release_generic_tr,
7433 };
7434 
7435 static const struct file_operations tracing_free_buffer_fops = {
7436 	.open		= tracing_open_generic_tr,
7437 	.write		= tracing_free_buffer_write,
7438 	.release	= tracing_free_buffer_release,
7439 };
7440 
7441 static const struct file_operations tracing_mark_fops = {
7442 	.open		= tracing_mark_open,
7443 	.write		= tracing_mark_write,
7444 	.release	= tracing_release_generic_tr,
7445 };
7446 
7447 static const struct file_operations tracing_mark_raw_fops = {
7448 	.open		= tracing_mark_open,
7449 	.write		= tracing_mark_raw_write,
7450 	.release	= tracing_release_generic_tr,
7451 };
7452 
7453 static const struct file_operations trace_clock_fops = {
7454 	.open		= tracing_clock_open,
7455 	.read		= seq_read,
7456 	.llseek		= seq_lseek,
7457 	.release	= tracing_single_release_tr,
7458 	.write		= tracing_clock_write,
7459 };
7460 
7461 static const struct file_operations trace_time_stamp_mode_fops = {
7462 	.open		= tracing_time_stamp_mode_open,
7463 	.read		= seq_read,
7464 	.llseek		= seq_lseek,
7465 	.release	= tracing_single_release_tr,
7466 };
7467 
7468 static const struct file_operations last_boot_fops = {
7469 	.open		= tracing_open_generic_tr,
7470 	.read		= tracing_last_boot_read,
7471 	.llseek		= generic_file_llseek,
7472 	.release	= tracing_release_generic_tr,
7473 };
7474 
7475 #ifdef CONFIG_TRACER_SNAPSHOT
7476 static const struct file_operations snapshot_fops = {
7477 	.open		= tracing_snapshot_open,
7478 	.read		= seq_read,
7479 	.write		= tracing_snapshot_write,
7480 	.llseek		= tracing_lseek,
7481 	.release	= tracing_snapshot_release,
7482 };
7483 
7484 static const struct file_operations snapshot_raw_fops = {
7485 	.open		= snapshot_raw_open,
7486 	.read		= tracing_buffers_read,
7487 	.release	= tracing_buffers_release,
7488 	.splice_read	= tracing_buffers_splice_read,
7489 };
7490 
7491 #endif /* CONFIG_TRACER_SNAPSHOT */
7492 
7493 /*
7494  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7495  * @filp: The active open file structure
7496  * @ubuf: The userspace provided buffer to read value into
7497  * @cnt: The maximum number of bytes to read
7498  * @ppos: The current "file" position
7499  *
7500  * This function implements the write interface for a struct trace_min_max_param.
7501  * The filp->private_data must point to a trace_min_max_param structure that
7502  * defines where to write the value, the min and the max acceptable values,
7503  * and a lock to protect the write.
7504  */
7505 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7506 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7507 {
7508 	struct trace_min_max_param *param = filp->private_data;
7509 	u64 val;
7510 	int err;
7511 
7512 	if (!param)
7513 		return -EFAULT;
7514 
7515 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7516 	if (err)
7517 		return err;
7518 
7519 	if (param->lock)
7520 		mutex_lock(param->lock);
7521 
7522 	if (param->min && val < *param->min)
7523 		err = -EINVAL;
7524 
7525 	if (param->max && val > *param->max)
7526 		err = -EINVAL;
7527 
7528 	if (!err)
7529 		*param->val = val;
7530 
7531 	if (param->lock)
7532 		mutex_unlock(param->lock);
7533 
7534 	if (err)
7535 		return err;
7536 
7537 	return cnt;
7538 }
7539 
7540 /*
7541  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7542  * @filp: The active open file structure
7543  * @ubuf: The userspace provided buffer to read value into
7544  * @cnt: The maximum number of bytes to read
7545  * @ppos: The current "file" position
7546  *
7547  * This function implements the read interface for a struct trace_min_max_param.
7548  * The filp->private_data must point to a trace_min_max_param struct with valid
7549  * data.
7550  */
7551 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7552 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554 	struct trace_min_max_param *param = filp->private_data;
7555 	char buf[U64_STR_SIZE];
7556 	int len;
7557 	u64 val;
7558 
7559 	if (!param)
7560 		return -EFAULT;
7561 
7562 	val = *param->val;
7563 
7564 	if (cnt > sizeof(buf))
7565 		cnt = sizeof(buf);
7566 
7567 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7568 
7569 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7570 }
7571 
7572 const struct file_operations trace_min_max_fops = {
7573 	.open		= tracing_open_generic,
7574 	.read		= trace_min_max_read,
7575 	.write		= trace_min_max_write,
7576 };
7577 
7578 #define TRACING_LOG_ERRS_MAX	8
7579 #define TRACING_LOG_LOC_MAX	128
7580 
7581 #define CMD_PREFIX "  Command: "
7582 
7583 struct err_info {
7584 	const char	**errs;	/* ptr to loc-specific array of err strings */
7585 	u8		type;	/* index into errs -> specific err string */
7586 	u16		pos;	/* caret position */
7587 	u64		ts;
7588 };
7589 
7590 struct tracing_log_err {
7591 	struct list_head	list;
7592 	struct err_info		info;
7593 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7594 	char			*cmd;                     /* what caused err */
7595 };
7596 
7597 static DEFINE_MUTEX(tracing_err_log_lock);
7598 
alloc_tracing_log_err(int len)7599 static struct tracing_log_err *alloc_tracing_log_err(int len)
7600 {
7601 	struct tracing_log_err *err;
7602 
7603 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7604 	if (!err)
7605 		return ERR_PTR(-ENOMEM);
7606 
7607 	err->cmd = kzalloc(len, GFP_KERNEL);
7608 	if (!err->cmd) {
7609 		kfree(err);
7610 		return ERR_PTR(-ENOMEM);
7611 	}
7612 
7613 	return err;
7614 }
7615 
free_tracing_log_err(struct tracing_log_err * err)7616 static void free_tracing_log_err(struct tracing_log_err *err)
7617 {
7618 	kfree(err->cmd);
7619 	kfree(err);
7620 }
7621 
get_tracing_log_err(struct trace_array * tr,int len)7622 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7623 						   int len)
7624 {
7625 	struct tracing_log_err *err;
7626 	char *cmd;
7627 
7628 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7629 		err = alloc_tracing_log_err(len);
7630 		if (PTR_ERR(err) != -ENOMEM)
7631 			tr->n_err_log_entries++;
7632 
7633 		return err;
7634 	}
7635 	cmd = kzalloc(len, GFP_KERNEL);
7636 	if (!cmd)
7637 		return ERR_PTR(-ENOMEM);
7638 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7639 	kfree(err->cmd);
7640 	err->cmd = cmd;
7641 	list_del(&err->list);
7642 
7643 	return err;
7644 }
7645 
7646 /**
7647  * err_pos - find the position of a string within a command for error careting
7648  * @cmd: The tracing command that caused the error
7649  * @str: The string to position the caret at within @cmd
7650  *
7651  * Finds the position of the first occurrence of @str within @cmd.  The
7652  * return value can be passed to tracing_log_err() for caret placement
7653  * within @cmd.
7654  *
7655  * Returns the index within @cmd of the first occurrence of @str or 0
7656  * if @str was not found.
7657  */
err_pos(char * cmd,const char * str)7658 unsigned int err_pos(char *cmd, const char *str)
7659 {
7660 	char *found;
7661 
7662 	if (WARN_ON(!strlen(cmd)))
7663 		return 0;
7664 
7665 	found = strstr(cmd, str);
7666 	if (found)
7667 		return found - cmd;
7668 
7669 	return 0;
7670 }
7671 
7672 /**
7673  * tracing_log_err - write an error to the tracing error log
7674  * @tr: The associated trace array for the error (NULL for top level array)
7675  * @loc: A string describing where the error occurred
7676  * @cmd: The tracing command that caused the error
7677  * @errs: The array of loc-specific static error strings
7678  * @type: The index into errs[], which produces the specific static err string
7679  * @pos: The position the caret should be placed in the cmd
7680  *
7681  * Writes an error into tracing/error_log of the form:
7682  *
7683  * <loc>: error: <text>
7684  *   Command: <cmd>
7685  *              ^
7686  *
7687  * tracing/error_log is a small log file containing the last
7688  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7689  * unless there has been a tracing error, and the error log can be
7690  * cleared and have its memory freed by writing the empty string in
7691  * truncation mode to it i.e. echo > tracing/error_log.
7692  *
7693  * NOTE: the @errs array along with the @type param are used to
7694  * produce a static error string - this string is not copied and saved
7695  * when the error is logged - only a pointer to it is saved.  See
7696  * existing callers for examples of how static strings are typically
7697  * defined for use with tracing_log_err().
7698  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7699 void tracing_log_err(struct trace_array *tr,
7700 		     const char *loc, const char *cmd,
7701 		     const char **errs, u8 type, u16 pos)
7702 {
7703 	struct tracing_log_err *err;
7704 	int len = 0;
7705 
7706 	if (!tr)
7707 		tr = &global_trace;
7708 
7709 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7710 
7711 	guard(mutex)(&tracing_err_log_lock);
7712 
7713 	err = get_tracing_log_err(tr, len);
7714 	if (PTR_ERR(err) == -ENOMEM)
7715 		return;
7716 
7717 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7718 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7719 
7720 	err->info.errs = errs;
7721 	err->info.type = type;
7722 	err->info.pos = pos;
7723 	err->info.ts = local_clock();
7724 
7725 	list_add_tail(&err->list, &tr->err_log);
7726 }
7727 
clear_tracing_err_log(struct trace_array * tr)7728 static void clear_tracing_err_log(struct trace_array *tr)
7729 {
7730 	struct tracing_log_err *err, *next;
7731 
7732 	mutex_lock(&tracing_err_log_lock);
7733 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7734 		list_del(&err->list);
7735 		free_tracing_log_err(err);
7736 	}
7737 
7738 	tr->n_err_log_entries = 0;
7739 	mutex_unlock(&tracing_err_log_lock);
7740 }
7741 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7742 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7743 {
7744 	struct trace_array *tr = m->private;
7745 
7746 	mutex_lock(&tracing_err_log_lock);
7747 
7748 	return seq_list_start(&tr->err_log, *pos);
7749 }
7750 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7751 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7752 {
7753 	struct trace_array *tr = m->private;
7754 
7755 	return seq_list_next(v, &tr->err_log, pos);
7756 }
7757 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7758 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7759 {
7760 	mutex_unlock(&tracing_err_log_lock);
7761 }
7762 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7763 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7764 {
7765 	u16 i;
7766 
7767 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7768 		seq_putc(m, ' ');
7769 	for (i = 0; i < pos; i++)
7770 		seq_putc(m, ' ');
7771 	seq_puts(m, "^\n");
7772 }
7773 
tracing_err_log_seq_show(struct seq_file * m,void * v)7774 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7775 {
7776 	struct tracing_log_err *err = v;
7777 
7778 	if (err) {
7779 		const char *err_text = err->info.errs[err->info.type];
7780 		u64 sec = err->info.ts;
7781 		u32 nsec;
7782 
7783 		nsec = do_div(sec, NSEC_PER_SEC);
7784 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7785 			   err->loc, err_text);
7786 		seq_printf(m, "%s", err->cmd);
7787 		tracing_err_log_show_pos(m, err->info.pos);
7788 	}
7789 
7790 	return 0;
7791 }
7792 
7793 static const struct seq_operations tracing_err_log_seq_ops = {
7794 	.start  = tracing_err_log_seq_start,
7795 	.next   = tracing_err_log_seq_next,
7796 	.stop   = tracing_err_log_seq_stop,
7797 	.show   = tracing_err_log_seq_show
7798 };
7799 
tracing_err_log_open(struct inode * inode,struct file * file)7800 static int tracing_err_log_open(struct inode *inode, struct file *file)
7801 {
7802 	struct trace_array *tr = inode->i_private;
7803 	int ret = 0;
7804 
7805 	ret = tracing_check_open_get_tr(tr);
7806 	if (ret)
7807 		return ret;
7808 
7809 	/* If this file was opened for write, then erase contents */
7810 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7811 		clear_tracing_err_log(tr);
7812 
7813 	if (file->f_mode & FMODE_READ) {
7814 		ret = seq_open(file, &tracing_err_log_seq_ops);
7815 		if (!ret) {
7816 			struct seq_file *m = file->private_data;
7817 			m->private = tr;
7818 		} else {
7819 			trace_array_put(tr);
7820 		}
7821 	}
7822 	return ret;
7823 }
7824 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7825 static ssize_t tracing_err_log_write(struct file *file,
7826 				     const char __user *buffer,
7827 				     size_t count, loff_t *ppos)
7828 {
7829 	return count;
7830 }
7831 
tracing_err_log_release(struct inode * inode,struct file * file)7832 static int tracing_err_log_release(struct inode *inode, struct file *file)
7833 {
7834 	struct trace_array *tr = inode->i_private;
7835 
7836 	trace_array_put(tr);
7837 
7838 	if (file->f_mode & FMODE_READ)
7839 		seq_release(inode, file);
7840 
7841 	return 0;
7842 }
7843 
7844 static const struct file_operations tracing_err_log_fops = {
7845 	.open           = tracing_err_log_open,
7846 	.write		= tracing_err_log_write,
7847 	.read           = seq_read,
7848 	.llseek         = tracing_lseek,
7849 	.release        = tracing_err_log_release,
7850 };
7851 
tracing_buffers_open(struct inode * inode,struct file * filp)7852 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7853 {
7854 	struct trace_array *tr = inode->i_private;
7855 	struct ftrace_buffer_info *info;
7856 	int ret;
7857 
7858 	ret = tracing_check_open_get_tr(tr);
7859 	if (ret)
7860 		return ret;
7861 
7862 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7863 	if (!info) {
7864 		trace_array_put(tr);
7865 		return -ENOMEM;
7866 	}
7867 
7868 	mutex_lock(&trace_types_lock);
7869 
7870 	info->iter.tr		= tr;
7871 	info->iter.cpu_file	= tracing_get_cpu(inode);
7872 	info->iter.trace	= tr->current_trace;
7873 	info->iter.array_buffer = &tr->array_buffer;
7874 	info->spare		= NULL;
7875 	/* Force reading ring buffer for first read */
7876 	info->read		= (unsigned int)-1;
7877 
7878 	filp->private_data = info;
7879 
7880 	tr->trace_ref++;
7881 
7882 	mutex_unlock(&trace_types_lock);
7883 
7884 	ret = nonseekable_open(inode, filp);
7885 	if (ret < 0)
7886 		trace_array_put(tr);
7887 
7888 	return ret;
7889 }
7890 
7891 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7892 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7893 {
7894 	struct ftrace_buffer_info *info = filp->private_data;
7895 	struct trace_iterator *iter = &info->iter;
7896 
7897 	return trace_poll(iter, filp, poll_table);
7898 }
7899 
7900 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7901 tracing_buffers_read(struct file *filp, char __user *ubuf,
7902 		     size_t count, loff_t *ppos)
7903 {
7904 	struct ftrace_buffer_info *info = filp->private_data;
7905 	struct trace_iterator *iter = &info->iter;
7906 	void *trace_data;
7907 	int page_size;
7908 	ssize_t ret = 0;
7909 	ssize_t size;
7910 
7911 	if (!count)
7912 		return 0;
7913 
7914 #ifdef CONFIG_TRACER_MAX_TRACE
7915 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7916 		return -EBUSY;
7917 #endif
7918 
7919 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7920 
7921 	/* Make sure the spare matches the current sub buffer size */
7922 	if (info->spare) {
7923 		if (page_size != info->spare_size) {
7924 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7925 						   info->spare_cpu, info->spare);
7926 			info->spare = NULL;
7927 		}
7928 	}
7929 
7930 	if (!info->spare) {
7931 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7932 							  iter->cpu_file);
7933 		if (IS_ERR(info->spare)) {
7934 			ret = PTR_ERR(info->spare);
7935 			info->spare = NULL;
7936 		} else {
7937 			info->spare_cpu = iter->cpu_file;
7938 			info->spare_size = page_size;
7939 		}
7940 	}
7941 	if (!info->spare)
7942 		return ret;
7943 
7944 	/* Do we have previous read data to read? */
7945 	if (info->read < page_size)
7946 		goto read;
7947 
7948  again:
7949 	trace_access_lock(iter->cpu_file);
7950 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7951 				    info->spare,
7952 				    count,
7953 				    iter->cpu_file, 0);
7954 	trace_access_unlock(iter->cpu_file);
7955 
7956 	if (ret < 0) {
7957 		if (trace_empty(iter) && !iter->closed) {
7958 			if ((filp->f_flags & O_NONBLOCK))
7959 				return -EAGAIN;
7960 
7961 			ret = wait_on_pipe(iter, 0);
7962 			if (ret)
7963 				return ret;
7964 
7965 			goto again;
7966 		}
7967 		return 0;
7968 	}
7969 
7970 	info->read = 0;
7971  read:
7972 	size = page_size - info->read;
7973 	if (size > count)
7974 		size = count;
7975 	trace_data = ring_buffer_read_page_data(info->spare);
7976 	ret = copy_to_user(ubuf, trace_data + info->read, size);
7977 	if (ret == size)
7978 		return -EFAULT;
7979 
7980 	size -= ret;
7981 
7982 	*ppos += size;
7983 	info->read += size;
7984 
7985 	return size;
7986 }
7987 
tracing_buffers_flush(struct file * file,fl_owner_t id)7988 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
7989 {
7990 	struct ftrace_buffer_info *info = file->private_data;
7991 	struct trace_iterator *iter = &info->iter;
7992 
7993 	iter->closed = true;
7994 	/* Make sure the waiters see the new wait_index */
7995 	(void)atomic_fetch_inc_release(&iter->wait_index);
7996 
7997 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
7998 
7999 	return 0;
8000 }
8001 
tracing_buffers_release(struct inode * inode,struct file * file)8002 static int tracing_buffers_release(struct inode *inode, struct file *file)
8003 {
8004 	struct ftrace_buffer_info *info = file->private_data;
8005 	struct trace_iterator *iter = &info->iter;
8006 
8007 	mutex_lock(&trace_types_lock);
8008 
8009 	iter->tr->trace_ref--;
8010 
8011 	__trace_array_put(iter->tr);
8012 
8013 	if (info->spare)
8014 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8015 					   info->spare_cpu, info->spare);
8016 	kvfree(info);
8017 
8018 	mutex_unlock(&trace_types_lock);
8019 
8020 	return 0;
8021 }
8022 
8023 struct buffer_ref {
8024 	struct trace_buffer	*buffer;
8025 	void			*page;
8026 	int			cpu;
8027 	refcount_t		refcount;
8028 };
8029 
buffer_ref_release(struct buffer_ref * ref)8030 static void buffer_ref_release(struct buffer_ref *ref)
8031 {
8032 	if (!refcount_dec_and_test(&ref->refcount))
8033 		return;
8034 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8035 	kfree(ref);
8036 }
8037 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8038 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8039 				    struct pipe_buffer *buf)
8040 {
8041 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8042 
8043 	buffer_ref_release(ref);
8044 	buf->private = 0;
8045 }
8046 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8047 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8048 				struct pipe_buffer *buf)
8049 {
8050 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8051 
8052 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8053 		return false;
8054 
8055 	refcount_inc(&ref->refcount);
8056 	return true;
8057 }
8058 
8059 /* Pipe buffer operations for a buffer. */
8060 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8061 	.release		= buffer_pipe_buf_release,
8062 	.get			= buffer_pipe_buf_get,
8063 };
8064 
8065 /*
8066  * Callback from splice_to_pipe(), if we need to release some pages
8067  * at the end of the spd in case we error'ed out in filling the pipe.
8068  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8069 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8070 {
8071 	struct buffer_ref *ref =
8072 		(struct buffer_ref *)spd->partial[i].private;
8073 
8074 	buffer_ref_release(ref);
8075 	spd->partial[i].private = 0;
8076 }
8077 
8078 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8079 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8080 			    struct pipe_inode_info *pipe, size_t len,
8081 			    unsigned int flags)
8082 {
8083 	struct ftrace_buffer_info *info = file->private_data;
8084 	struct trace_iterator *iter = &info->iter;
8085 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8086 	struct page *pages_def[PIPE_DEF_BUFFERS];
8087 	struct splice_pipe_desc spd = {
8088 		.pages		= pages_def,
8089 		.partial	= partial_def,
8090 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8091 		.ops		= &buffer_pipe_buf_ops,
8092 		.spd_release	= buffer_spd_release,
8093 	};
8094 	struct buffer_ref *ref;
8095 	bool woken = false;
8096 	int page_size;
8097 	int entries, i;
8098 	ssize_t ret = 0;
8099 
8100 #ifdef CONFIG_TRACER_MAX_TRACE
8101 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8102 		return -EBUSY;
8103 #endif
8104 
8105 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8106 	if (*ppos & (page_size - 1))
8107 		return -EINVAL;
8108 
8109 	if (len & (page_size - 1)) {
8110 		if (len < page_size)
8111 			return -EINVAL;
8112 		len &= (~(page_size - 1));
8113 	}
8114 
8115 	if (splice_grow_spd(pipe, &spd))
8116 		return -ENOMEM;
8117 
8118  again:
8119 	trace_access_lock(iter->cpu_file);
8120 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8121 
8122 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8123 		struct page *page;
8124 		int r;
8125 
8126 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8127 		if (!ref) {
8128 			ret = -ENOMEM;
8129 			break;
8130 		}
8131 
8132 		refcount_set(&ref->refcount, 1);
8133 		ref->buffer = iter->array_buffer->buffer;
8134 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8135 		if (IS_ERR(ref->page)) {
8136 			ret = PTR_ERR(ref->page);
8137 			ref->page = NULL;
8138 			kfree(ref);
8139 			break;
8140 		}
8141 		ref->cpu = iter->cpu_file;
8142 
8143 		r = ring_buffer_read_page(ref->buffer, ref->page,
8144 					  len, iter->cpu_file, 1);
8145 		if (r < 0) {
8146 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8147 						   ref->page);
8148 			kfree(ref);
8149 			break;
8150 		}
8151 
8152 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8153 
8154 		spd.pages[i] = page;
8155 		spd.partial[i].len = page_size;
8156 		spd.partial[i].offset = 0;
8157 		spd.partial[i].private = (unsigned long)ref;
8158 		spd.nr_pages++;
8159 		*ppos += page_size;
8160 
8161 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8162 	}
8163 
8164 	trace_access_unlock(iter->cpu_file);
8165 	spd.nr_pages = i;
8166 
8167 	/* did we read anything? */
8168 	if (!spd.nr_pages) {
8169 
8170 		if (ret)
8171 			goto out;
8172 
8173 		if (woken)
8174 			goto out;
8175 
8176 		ret = -EAGAIN;
8177 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8178 			goto out;
8179 
8180 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8181 		if (ret)
8182 			goto out;
8183 
8184 		/* No need to wait after waking up when tracing is off */
8185 		if (!tracer_tracing_is_on(iter->tr))
8186 			goto out;
8187 
8188 		/* Iterate one more time to collect any new data then exit */
8189 		woken = true;
8190 
8191 		goto again;
8192 	}
8193 
8194 	ret = splice_to_pipe(pipe, &spd);
8195 out:
8196 	splice_shrink_spd(&spd);
8197 
8198 	return ret;
8199 }
8200 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8201 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8202 {
8203 	struct ftrace_buffer_info *info = file->private_data;
8204 	struct trace_iterator *iter = &info->iter;
8205 	int err;
8206 
8207 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8208 		if (!(file->f_flags & O_NONBLOCK)) {
8209 			err = ring_buffer_wait(iter->array_buffer->buffer,
8210 					       iter->cpu_file,
8211 					       iter->tr->buffer_percent,
8212 					       NULL, NULL);
8213 			if (err)
8214 				return err;
8215 		}
8216 
8217 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8218 						  iter->cpu_file);
8219 	} else if (cmd) {
8220 		return -ENOTTY;
8221 	}
8222 
8223 	/*
8224 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8225 	 * waiters
8226 	 */
8227 	mutex_lock(&trace_types_lock);
8228 
8229 	/* Make sure the waiters see the new wait_index */
8230 	(void)atomic_fetch_inc_release(&iter->wait_index);
8231 
8232 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8233 
8234 	mutex_unlock(&trace_types_lock);
8235 	return 0;
8236 }
8237 
8238 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8239 static int get_snapshot_map(struct trace_array *tr)
8240 {
8241 	int err = 0;
8242 
8243 	/*
8244 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8245 	 * take trace_types_lock. Instead use the specific
8246 	 * snapshot_trigger_lock.
8247 	 */
8248 	spin_lock(&tr->snapshot_trigger_lock);
8249 
8250 	if (tr->snapshot || tr->mapped == UINT_MAX)
8251 		err = -EBUSY;
8252 	else
8253 		tr->mapped++;
8254 
8255 	spin_unlock(&tr->snapshot_trigger_lock);
8256 
8257 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8258 	if (tr->mapped == 1)
8259 		synchronize_rcu();
8260 
8261 	return err;
8262 
8263 }
put_snapshot_map(struct trace_array * tr)8264 static void put_snapshot_map(struct trace_array *tr)
8265 {
8266 	spin_lock(&tr->snapshot_trigger_lock);
8267 	if (!WARN_ON(!tr->mapped))
8268 		tr->mapped--;
8269 	spin_unlock(&tr->snapshot_trigger_lock);
8270 }
8271 #else
get_snapshot_map(struct trace_array * tr)8272 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8273 static inline void put_snapshot_map(struct trace_array *tr) { }
8274 #endif
8275 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8276 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8277 {
8278 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8279 	struct trace_iterator *iter = &info->iter;
8280 
8281 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8282 	put_snapshot_map(iter->tr);
8283 }
8284 
8285 static const struct vm_operations_struct tracing_buffers_vmops = {
8286 	.close		= tracing_buffers_mmap_close,
8287 };
8288 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8289 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8290 {
8291 	struct ftrace_buffer_info *info = filp->private_data;
8292 	struct trace_iterator *iter = &info->iter;
8293 	int ret = 0;
8294 
8295 	/* Currently the boot mapped buffer is not supported for mmap */
8296 	if (iter->tr->flags & TRACE_ARRAY_FL_BOOT)
8297 		return -ENODEV;
8298 
8299 	ret = get_snapshot_map(iter->tr);
8300 	if (ret)
8301 		return ret;
8302 
8303 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8304 	if (ret)
8305 		put_snapshot_map(iter->tr);
8306 
8307 	vma->vm_ops = &tracing_buffers_vmops;
8308 
8309 	return ret;
8310 }
8311 
8312 static const struct file_operations tracing_buffers_fops = {
8313 	.open		= tracing_buffers_open,
8314 	.read		= tracing_buffers_read,
8315 	.poll		= tracing_buffers_poll,
8316 	.release	= tracing_buffers_release,
8317 	.flush		= tracing_buffers_flush,
8318 	.splice_read	= tracing_buffers_splice_read,
8319 	.unlocked_ioctl = tracing_buffers_ioctl,
8320 	.mmap		= tracing_buffers_mmap,
8321 };
8322 
8323 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8324 tracing_stats_read(struct file *filp, char __user *ubuf,
8325 		   size_t count, loff_t *ppos)
8326 {
8327 	struct inode *inode = file_inode(filp);
8328 	struct trace_array *tr = inode->i_private;
8329 	struct array_buffer *trace_buf = &tr->array_buffer;
8330 	int cpu = tracing_get_cpu(inode);
8331 	struct trace_seq *s;
8332 	unsigned long cnt;
8333 	unsigned long long t;
8334 	unsigned long usec_rem;
8335 
8336 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8337 	if (!s)
8338 		return -ENOMEM;
8339 
8340 	trace_seq_init(s);
8341 
8342 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8343 	trace_seq_printf(s, "entries: %ld\n", cnt);
8344 
8345 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8346 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8347 
8348 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8349 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8350 
8351 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8352 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8353 
8354 	if (trace_clocks[tr->clock_id].in_ns) {
8355 		/* local or global for trace_clock */
8356 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8357 		usec_rem = do_div(t, USEC_PER_SEC);
8358 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8359 								t, usec_rem);
8360 
8361 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8362 		usec_rem = do_div(t, USEC_PER_SEC);
8363 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8364 	} else {
8365 		/* counter or tsc mode for trace_clock */
8366 		trace_seq_printf(s, "oldest event ts: %llu\n",
8367 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8368 
8369 		trace_seq_printf(s, "now ts: %llu\n",
8370 				ring_buffer_time_stamp(trace_buf->buffer));
8371 	}
8372 
8373 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8374 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8375 
8376 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8377 	trace_seq_printf(s, "read events: %ld\n", cnt);
8378 
8379 	count = simple_read_from_buffer(ubuf, count, ppos,
8380 					s->buffer, trace_seq_used(s));
8381 
8382 	kfree(s);
8383 
8384 	return count;
8385 }
8386 
8387 static const struct file_operations tracing_stats_fops = {
8388 	.open		= tracing_open_generic_tr,
8389 	.read		= tracing_stats_read,
8390 	.llseek		= generic_file_llseek,
8391 	.release	= tracing_release_generic_tr,
8392 };
8393 
8394 #ifdef CONFIG_DYNAMIC_FTRACE
8395 
8396 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8397 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8398 		  size_t cnt, loff_t *ppos)
8399 {
8400 	ssize_t ret;
8401 	char *buf;
8402 	int r;
8403 
8404 	/* 512 should be plenty to hold the amount needed */
8405 #define DYN_INFO_BUF_SIZE	512
8406 
8407 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8408 	if (!buf)
8409 		return -ENOMEM;
8410 
8411 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8412 		      "%ld pages:%ld groups: %ld\n"
8413 		      "ftrace boot update time = %llu (ns)\n"
8414 		      "ftrace module total update time = %llu (ns)\n",
8415 		      ftrace_update_tot_cnt,
8416 		      ftrace_number_of_pages,
8417 		      ftrace_number_of_groups,
8418 		      ftrace_update_time,
8419 		      ftrace_total_mod_time);
8420 
8421 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8422 	kfree(buf);
8423 	return ret;
8424 }
8425 
8426 static const struct file_operations tracing_dyn_info_fops = {
8427 	.open		= tracing_open_generic,
8428 	.read		= tracing_read_dyn_info,
8429 	.llseek		= generic_file_llseek,
8430 };
8431 #endif /* CONFIG_DYNAMIC_FTRACE */
8432 
8433 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8434 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8435 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8436 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8437 		void *data)
8438 {
8439 	tracing_snapshot_instance(tr);
8440 }
8441 
8442 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8443 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8444 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8445 		      void *data)
8446 {
8447 	struct ftrace_func_mapper *mapper = data;
8448 	long *count = NULL;
8449 
8450 	if (mapper)
8451 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8452 
8453 	if (count) {
8454 
8455 		if (*count <= 0)
8456 			return;
8457 
8458 		(*count)--;
8459 	}
8460 
8461 	tracing_snapshot_instance(tr);
8462 }
8463 
8464 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8465 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8466 		      struct ftrace_probe_ops *ops, void *data)
8467 {
8468 	struct ftrace_func_mapper *mapper = data;
8469 	long *count = NULL;
8470 
8471 	seq_printf(m, "%ps:", (void *)ip);
8472 
8473 	seq_puts(m, "snapshot");
8474 
8475 	if (mapper)
8476 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8477 
8478 	if (count)
8479 		seq_printf(m, ":count=%ld\n", *count);
8480 	else
8481 		seq_puts(m, ":unlimited\n");
8482 
8483 	return 0;
8484 }
8485 
8486 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8487 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8488 		     unsigned long ip, void *init_data, void **data)
8489 {
8490 	struct ftrace_func_mapper *mapper = *data;
8491 
8492 	if (!mapper) {
8493 		mapper = allocate_ftrace_func_mapper();
8494 		if (!mapper)
8495 			return -ENOMEM;
8496 		*data = mapper;
8497 	}
8498 
8499 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8500 }
8501 
8502 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8503 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8504 		     unsigned long ip, void *data)
8505 {
8506 	struct ftrace_func_mapper *mapper = data;
8507 
8508 	if (!ip) {
8509 		if (!mapper)
8510 			return;
8511 		free_ftrace_func_mapper(mapper, NULL);
8512 		return;
8513 	}
8514 
8515 	ftrace_func_mapper_remove_ip(mapper, ip);
8516 }
8517 
8518 static struct ftrace_probe_ops snapshot_probe_ops = {
8519 	.func			= ftrace_snapshot,
8520 	.print			= ftrace_snapshot_print,
8521 };
8522 
8523 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8524 	.func			= ftrace_count_snapshot,
8525 	.print			= ftrace_snapshot_print,
8526 	.init			= ftrace_snapshot_init,
8527 	.free			= ftrace_snapshot_free,
8528 };
8529 
8530 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8531 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8532 			       char *glob, char *cmd, char *param, int enable)
8533 {
8534 	struct ftrace_probe_ops *ops;
8535 	void *count = (void *)-1;
8536 	char *number;
8537 	int ret;
8538 
8539 	if (!tr)
8540 		return -ENODEV;
8541 
8542 	/* hash funcs only work with set_ftrace_filter */
8543 	if (!enable)
8544 		return -EINVAL;
8545 
8546 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8547 
8548 	if (glob[0] == '!') {
8549 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8550 		if (!ret)
8551 			tracing_disarm_snapshot(tr);
8552 
8553 		return ret;
8554 	}
8555 
8556 	if (!param)
8557 		goto out_reg;
8558 
8559 	number = strsep(&param, ":");
8560 
8561 	if (!strlen(number))
8562 		goto out_reg;
8563 
8564 	/*
8565 	 * We use the callback data field (which is a pointer)
8566 	 * as our counter.
8567 	 */
8568 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8569 	if (ret)
8570 		return ret;
8571 
8572  out_reg:
8573 	ret = tracing_arm_snapshot(tr);
8574 	if (ret < 0)
8575 		goto out;
8576 
8577 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8578 	if (ret < 0)
8579 		tracing_disarm_snapshot(tr);
8580  out:
8581 	return ret < 0 ? ret : 0;
8582 }
8583 
8584 static struct ftrace_func_command ftrace_snapshot_cmd = {
8585 	.name			= "snapshot",
8586 	.func			= ftrace_trace_snapshot_callback,
8587 };
8588 
register_snapshot_cmd(void)8589 static __init int register_snapshot_cmd(void)
8590 {
8591 	return register_ftrace_command(&ftrace_snapshot_cmd);
8592 }
8593 #else
register_snapshot_cmd(void)8594 static inline __init int register_snapshot_cmd(void) { return 0; }
8595 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8596 
tracing_get_dentry(struct trace_array * tr)8597 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8598 {
8599 	if (WARN_ON(!tr->dir))
8600 		return ERR_PTR(-ENODEV);
8601 
8602 	/* Top directory uses NULL as the parent */
8603 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8604 		return NULL;
8605 
8606 	/* All sub buffers have a descriptor */
8607 	return tr->dir;
8608 }
8609 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8610 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8611 {
8612 	struct dentry *d_tracer;
8613 
8614 	if (tr->percpu_dir)
8615 		return tr->percpu_dir;
8616 
8617 	d_tracer = tracing_get_dentry(tr);
8618 	if (IS_ERR(d_tracer))
8619 		return NULL;
8620 
8621 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8622 
8623 	MEM_FAIL(!tr->percpu_dir,
8624 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8625 
8626 	return tr->percpu_dir;
8627 }
8628 
8629 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8630 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8631 		      void *data, long cpu, const struct file_operations *fops)
8632 {
8633 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8634 
8635 	if (ret) /* See tracing_get_cpu() */
8636 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8637 	return ret;
8638 }
8639 
8640 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8641 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8642 {
8643 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8644 	struct dentry *d_cpu;
8645 	char cpu_dir[30]; /* 30 characters should be more than enough */
8646 
8647 	if (!d_percpu)
8648 		return;
8649 
8650 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8651 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8652 	if (!d_cpu) {
8653 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8654 		return;
8655 	}
8656 
8657 	/* per cpu trace_pipe */
8658 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8659 				tr, cpu, &tracing_pipe_fops);
8660 
8661 	/* per cpu trace */
8662 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8663 				tr, cpu, &tracing_fops);
8664 
8665 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8666 				tr, cpu, &tracing_buffers_fops);
8667 
8668 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8669 				tr, cpu, &tracing_stats_fops);
8670 
8671 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8672 				tr, cpu, &tracing_entries_fops);
8673 
8674 	if (tr->range_addr_start)
8675 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8676 				      tr, cpu, &tracing_buffer_meta_fops);
8677 #ifdef CONFIG_TRACER_SNAPSHOT
8678 	if (!tr->range_addr_start) {
8679 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8680 				      tr, cpu, &snapshot_fops);
8681 
8682 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8683 				      tr, cpu, &snapshot_raw_fops);
8684 	}
8685 #endif
8686 }
8687 
8688 #ifdef CONFIG_FTRACE_SELFTEST
8689 /* Let selftest have access to static functions in this file */
8690 #include "trace_selftest.c"
8691 #endif
8692 
8693 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8694 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8695 			loff_t *ppos)
8696 {
8697 	struct trace_option_dentry *topt = filp->private_data;
8698 	char *buf;
8699 
8700 	if (topt->flags->val & topt->opt->bit)
8701 		buf = "1\n";
8702 	else
8703 		buf = "0\n";
8704 
8705 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8706 }
8707 
8708 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8709 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8710 			 loff_t *ppos)
8711 {
8712 	struct trace_option_dentry *topt = filp->private_data;
8713 	unsigned long val;
8714 	int ret;
8715 
8716 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8717 	if (ret)
8718 		return ret;
8719 
8720 	if (val != 0 && val != 1)
8721 		return -EINVAL;
8722 
8723 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8724 		mutex_lock(&trace_types_lock);
8725 		ret = __set_tracer_option(topt->tr, topt->flags,
8726 					  topt->opt, !val);
8727 		mutex_unlock(&trace_types_lock);
8728 		if (ret)
8729 			return ret;
8730 	}
8731 
8732 	*ppos += cnt;
8733 
8734 	return cnt;
8735 }
8736 
tracing_open_options(struct inode * inode,struct file * filp)8737 static int tracing_open_options(struct inode *inode, struct file *filp)
8738 {
8739 	struct trace_option_dentry *topt = inode->i_private;
8740 	int ret;
8741 
8742 	ret = tracing_check_open_get_tr(topt->tr);
8743 	if (ret)
8744 		return ret;
8745 
8746 	filp->private_data = inode->i_private;
8747 	return 0;
8748 }
8749 
tracing_release_options(struct inode * inode,struct file * file)8750 static int tracing_release_options(struct inode *inode, struct file *file)
8751 {
8752 	struct trace_option_dentry *topt = file->private_data;
8753 
8754 	trace_array_put(topt->tr);
8755 	return 0;
8756 }
8757 
8758 static const struct file_operations trace_options_fops = {
8759 	.open = tracing_open_options,
8760 	.read = trace_options_read,
8761 	.write = trace_options_write,
8762 	.llseek	= generic_file_llseek,
8763 	.release = tracing_release_options,
8764 };
8765 
8766 /*
8767  * In order to pass in both the trace_array descriptor as well as the index
8768  * to the flag that the trace option file represents, the trace_array
8769  * has a character array of trace_flags_index[], which holds the index
8770  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8771  * The address of this character array is passed to the flag option file
8772  * read/write callbacks.
8773  *
8774  * In order to extract both the index and the trace_array descriptor,
8775  * get_tr_index() uses the following algorithm.
8776  *
8777  *   idx = *ptr;
8778  *
8779  * As the pointer itself contains the address of the index (remember
8780  * index[1] == 1).
8781  *
8782  * Then to get the trace_array descriptor, by subtracting that index
8783  * from the ptr, we get to the start of the index itself.
8784  *
8785  *   ptr - idx == &index[0]
8786  *
8787  * Then a simple container_of() from that pointer gets us to the
8788  * trace_array descriptor.
8789  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8790 static void get_tr_index(void *data, struct trace_array **ptr,
8791 			 unsigned int *pindex)
8792 {
8793 	*pindex = *(unsigned char *)data;
8794 
8795 	*ptr = container_of(data - *pindex, struct trace_array,
8796 			    trace_flags_index);
8797 }
8798 
8799 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8800 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8801 			loff_t *ppos)
8802 {
8803 	void *tr_index = filp->private_data;
8804 	struct trace_array *tr;
8805 	unsigned int index;
8806 	char *buf;
8807 
8808 	get_tr_index(tr_index, &tr, &index);
8809 
8810 	if (tr->trace_flags & (1 << index))
8811 		buf = "1\n";
8812 	else
8813 		buf = "0\n";
8814 
8815 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8816 }
8817 
8818 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8819 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8820 			 loff_t *ppos)
8821 {
8822 	void *tr_index = filp->private_data;
8823 	struct trace_array *tr;
8824 	unsigned int index;
8825 	unsigned long val;
8826 	int ret;
8827 
8828 	get_tr_index(tr_index, &tr, &index);
8829 
8830 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8831 	if (ret)
8832 		return ret;
8833 
8834 	if (val != 0 && val != 1)
8835 		return -EINVAL;
8836 
8837 	mutex_lock(&event_mutex);
8838 	mutex_lock(&trace_types_lock);
8839 	ret = set_tracer_flag(tr, 1 << index, val);
8840 	mutex_unlock(&trace_types_lock);
8841 	mutex_unlock(&event_mutex);
8842 
8843 	if (ret < 0)
8844 		return ret;
8845 
8846 	*ppos += cnt;
8847 
8848 	return cnt;
8849 }
8850 
8851 static const struct file_operations trace_options_core_fops = {
8852 	.open = tracing_open_generic,
8853 	.read = trace_options_core_read,
8854 	.write = trace_options_core_write,
8855 	.llseek = generic_file_llseek,
8856 };
8857 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8858 struct dentry *trace_create_file(const char *name,
8859 				 umode_t mode,
8860 				 struct dentry *parent,
8861 				 void *data,
8862 				 const struct file_operations *fops)
8863 {
8864 	struct dentry *ret;
8865 
8866 	ret = tracefs_create_file(name, mode, parent, data, fops);
8867 	if (!ret)
8868 		pr_warn("Could not create tracefs '%s' entry\n", name);
8869 
8870 	return ret;
8871 }
8872 
8873 
trace_options_init_dentry(struct trace_array * tr)8874 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8875 {
8876 	struct dentry *d_tracer;
8877 
8878 	if (tr->options)
8879 		return tr->options;
8880 
8881 	d_tracer = tracing_get_dentry(tr);
8882 	if (IS_ERR(d_tracer))
8883 		return NULL;
8884 
8885 	tr->options = tracefs_create_dir("options", d_tracer);
8886 	if (!tr->options) {
8887 		pr_warn("Could not create tracefs directory 'options'\n");
8888 		return NULL;
8889 	}
8890 
8891 	return tr->options;
8892 }
8893 
8894 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8895 create_trace_option_file(struct trace_array *tr,
8896 			 struct trace_option_dentry *topt,
8897 			 struct tracer_flags *flags,
8898 			 struct tracer_opt *opt)
8899 {
8900 	struct dentry *t_options;
8901 
8902 	t_options = trace_options_init_dentry(tr);
8903 	if (!t_options)
8904 		return;
8905 
8906 	topt->flags = flags;
8907 	topt->opt = opt;
8908 	topt->tr = tr;
8909 
8910 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8911 					t_options, topt, &trace_options_fops);
8912 
8913 }
8914 
8915 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8916 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8917 {
8918 	struct trace_option_dentry *topts;
8919 	struct trace_options *tr_topts;
8920 	struct tracer_flags *flags;
8921 	struct tracer_opt *opts;
8922 	int cnt;
8923 	int i;
8924 
8925 	if (!tracer)
8926 		return;
8927 
8928 	flags = tracer->flags;
8929 
8930 	if (!flags || !flags->opts)
8931 		return;
8932 
8933 	/*
8934 	 * If this is an instance, only create flags for tracers
8935 	 * the instance may have.
8936 	 */
8937 	if (!trace_ok_for_array(tracer, tr))
8938 		return;
8939 
8940 	for (i = 0; i < tr->nr_topts; i++) {
8941 		/* Make sure there's no duplicate flags. */
8942 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8943 			return;
8944 	}
8945 
8946 	opts = flags->opts;
8947 
8948 	for (cnt = 0; opts[cnt].name; cnt++)
8949 		;
8950 
8951 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8952 	if (!topts)
8953 		return;
8954 
8955 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
8956 			    GFP_KERNEL);
8957 	if (!tr_topts) {
8958 		kfree(topts);
8959 		return;
8960 	}
8961 
8962 	tr->topts = tr_topts;
8963 	tr->topts[tr->nr_topts].tracer = tracer;
8964 	tr->topts[tr->nr_topts].topts = topts;
8965 	tr->nr_topts++;
8966 
8967 	for (cnt = 0; opts[cnt].name; cnt++) {
8968 		create_trace_option_file(tr, &topts[cnt], flags,
8969 					 &opts[cnt]);
8970 		MEM_FAIL(topts[cnt].entry == NULL,
8971 			  "Failed to create trace option: %s",
8972 			  opts[cnt].name);
8973 	}
8974 }
8975 
8976 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)8977 create_trace_option_core_file(struct trace_array *tr,
8978 			      const char *option, long index)
8979 {
8980 	struct dentry *t_options;
8981 
8982 	t_options = trace_options_init_dentry(tr);
8983 	if (!t_options)
8984 		return NULL;
8985 
8986 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
8987 				 (void *)&tr->trace_flags_index[index],
8988 				 &trace_options_core_fops);
8989 }
8990 
create_trace_options_dir(struct trace_array * tr)8991 static void create_trace_options_dir(struct trace_array *tr)
8992 {
8993 	struct dentry *t_options;
8994 	bool top_level = tr == &global_trace;
8995 	int i;
8996 
8997 	t_options = trace_options_init_dentry(tr);
8998 	if (!t_options)
8999 		return;
9000 
9001 	for (i = 0; trace_options[i]; i++) {
9002 		if (top_level ||
9003 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9004 			create_trace_option_core_file(tr, trace_options[i], i);
9005 	}
9006 }
9007 
9008 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9009 rb_simple_read(struct file *filp, char __user *ubuf,
9010 	       size_t cnt, loff_t *ppos)
9011 {
9012 	struct trace_array *tr = filp->private_data;
9013 	char buf[64];
9014 	int r;
9015 
9016 	r = tracer_tracing_is_on(tr);
9017 	r = sprintf(buf, "%d\n", r);
9018 
9019 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9020 }
9021 
9022 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9023 rb_simple_write(struct file *filp, const char __user *ubuf,
9024 		size_t cnt, loff_t *ppos)
9025 {
9026 	struct trace_array *tr = filp->private_data;
9027 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9028 	unsigned long val;
9029 	int ret;
9030 
9031 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9032 	if (ret)
9033 		return ret;
9034 
9035 	if (buffer) {
9036 		mutex_lock(&trace_types_lock);
9037 		if (!!val == tracer_tracing_is_on(tr)) {
9038 			val = 0; /* do nothing */
9039 		} else if (val) {
9040 			tracer_tracing_on(tr);
9041 			if (tr->current_trace->start)
9042 				tr->current_trace->start(tr);
9043 		} else {
9044 			tracer_tracing_off(tr);
9045 			if (tr->current_trace->stop)
9046 				tr->current_trace->stop(tr);
9047 			/* Wake up any waiters */
9048 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9049 		}
9050 		mutex_unlock(&trace_types_lock);
9051 	}
9052 
9053 	(*ppos)++;
9054 
9055 	return cnt;
9056 }
9057 
9058 static const struct file_operations rb_simple_fops = {
9059 	.open		= tracing_open_generic_tr,
9060 	.read		= rb_simple_read,
9061 	.write		= rb_simple_write,
9062 	.release	= tracing_release_generic_tr,
9063 	.llseek		= default_llseek,
9064 };
9065 
9066 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9067 buffer_percent_read(struct file *filp, char __user *ubuf,
9068 		    size_t cnt, loff_t *ppos)
9069 {
9070 	struct trace_array *tr = filp->private_data;
9071 	char buf[64];
9072 	int r;
9073 
9074 	r = tr->buffer_percent;
9075 	r = sprintf(buf, "%d\n", r);
9076 
9077 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9078 }
9079 
9080 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9081 buffer_percent_write(struct file *filp, const char __user *ubuf,
9082 		     size_t cnt, loff_t *ppos)
9083 {
9084 	struct trace_array *tr = filp->private_data;
9085 	unsigned long val;
9086 	int ret;
9087 
9088 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9089 	if (ret)
9090 		return ret;
9091 
9092 	if (val > 100)
9093 		return -EINVAL;
9094 
9095 	tr->buffer_percent = val;
9096 
9097 	(*ppos)++;
9098 
9099 	return cnt;
9100 }
9101 
9102 static const struct file_operations buffer_percent_fops = {
9103 	.open		= tracing_open_generic_tr,
9104 	.read		= buffer_percent_read,
9105 	.write		= buffer_percent_write,
9106 	.release	= tracing_release_generic_tr,
9107 	.llseek		= default_llseek,
9108 };
9109 
9110 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9111 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9112 {
9113 	struct trace_array *tr = filp->private_data;
9114 	size_t size;
9115 	char buf[64];
9116 	int order;
9117 	int r;
9118 
9119 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9120 	size = (PAGE_SIZE << order) / 1024;
9121 
9122 	r = sprintf(buf, "%zd\n", size);
9123 
9124 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9125 }
9126 
9127 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9128 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9129 			 size_t cnt, loff_t *ppos)
9130 {
9131 	struct trace_array *tr = filp->private_data;
9132 	unsigned long val;
9133 	int old_order;
9134 	int order;
9135 	int pages;
9136 	int ret;
9137 
9138 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9139 	if (ret)
9140 		return ret;
9141 
9142 	val *= 1024; /* value passed in is in KB */
9143 
9144 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9145 	order = fls(pages - 1);
9146 
9147 	/* limit between 1 and 128 system pages */
9148 	if (order < 0 || order > 7)
9149 		return -EINVAL;
9150 
9151 	/* Do not allow tracing while changing the order of the ring buffer */
9152 	tracing_stop_tr(tr);
9153 
9154 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9155 	if (old_order == order)
9156 		goto out;
9157 
9158 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9159 	if (ret)
9160 		goto out;
9161 
9162 #ifdef CONFIG_TRACER_MAX_TRACE
9163 
9164 	if (!tr->allocated_snapshot)
9165 		goto out_max;
9166 
9167 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9168 	if (ret) {
9169 		/* Put back the old order */
9170 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9171 		if (WARN_ON_ONCE(cnt)) {
9172 			/*
9173 			 * AARGH! We are left with different orders!
9174 			 * The max buffer is our "snapshot" buffer.
9175 			 * When a tracer needs a snapshot (one of the
9176 			 * latency tracers), it swaps the max buffer
9177 			 * with the saved snap shot. We succeeded to
9178 			 * update the order of the main buffer, but failed to
9179 			 * update the order of the max buffer. But when we tried
9180 			 * to reset the main buffer to the original size, we
9181 			 * failed there too. This is very unlikely to
9182 			 * happen, but if it does, warn and kill all
9183 			 * tracing.
9184 			 */
9185 			tracing_disabled = 1;
9186 		}
9187 		goto out;
9188 	}
9189  out_max:
9190 #endif
9191 	(*ppos)++;
9192  out:
9193 	if (ret)
9194 		cnt = ret;
9195 	tracing_start_tr(tr);
9196 	return cnt;
9197 }
9198 
9199 static const struct file_operations buffer_subbuf_size_fops = {
9200 	.open		= tracing_open_generic_tr,
9201 	.read		= buffer_subbuf_size_read,
9202 	.write		= buffer_subbuf_size_write,
9203 	.release	= tracing_release_generic_tr,
9204 	.llseek		= default_llseek,
9205 };
9206 
9207 static struct dentry *trace_instance_dir;
9208 
9209 static void
9210 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9211 
9212 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9213 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9214 {
9215 	enum ring_buffer_flags rb_flags;
9216 
9217 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9218 
9219 	buf->tr = tr;
9220 
9221 	if (tr->range_addr_start && tr->range_addr_size) {
9222 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9223 						      tr->range_addr_start,
9224 						      tr->range_addr_size);
9225 
9226 		ring_buffer_last_boot_delta(buf->buffer,
9227 					    &tr->text_delta, &tr->data_delta);
9228 		/*
9229 		 * This is basically the same as a mapped buffer,
9230 		 * with the same restrictions.
9231 		 */
9232 		tr->mapped++;
9233 	} else {
9234 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9235 	}
9236 	if (!buf->buffer)
9237 		return -ENOMEM;
9238 
9239 	buf->data = alloc_percpu(struct trace_array_cpu);
9240 	if (!buf->data) {
9241 		ring_buffer_free(buf->buffer);
9242 		buf->buffer = NULL;
9243 		return -ENOMEM;
9244 	}
9245 
9246 	/* Allocate the first page for all buffers */
9247 	set_buffer_entries(&tr->array_buffer,
9248 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9249 
9250 	return 0;
9251 }
9252 
free_trace_buffer(struct array_buffer * buf)9253 static void free_trace_buffer(struct array_buffer *buf)
9254 {
9255 	if (buf->buffer) {
9256 		ring_buffer_free(buf->buffer);
9257 		buf->buffer = NULL;
9258 		free_percpu(buf->data);
9259 		buf->data = NULL;
9260 	}
9261 }
9262 
allocate_trace_buffers(struct trace_array * tr,int size)9263 static int allocate_trace_buffers(struct trace_array *tr, int size)
9264 {
9265 	int ret;
9266 
9267 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9268 	if (ret)
9269 		return ret;
9270 
9271 #ifdef CONFIG_TRACER_MAX_TRACE
9272 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9273 	if (tr->range_addr_start)
9274 		return 0;
9275 
9276 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9277 				    allocate_snapshot ? size : 1);
9278 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9279 		free_trace_buffer(&tr->array_buffer);
9280 		return -ENOMEM;
9281 	}
9282 	tr->allocated_snapshot = allocate_snapshot;
9283 
9284 	allocate_snapshot = false;
9285 #endif
9286 
9287 	return 0;
9288 }
9289 
free_trace_buffers(struct trace_array * tr)9290 static void free_trace_buffers(struct trace_array *tr)
9291 {
9292 	if (!tr)
9293 		return;
9294 
9295 	free_trace_buffer(&tr->array_buffer);
9296 
9297 #ifdef CONFIG_TRACER_MAX_TRACE
9298 	free_trace_buffer(&tr->max_buffer);
9299 #endif
9300 }
9301 
init_trace_flags_index(struct trace_array * tr)9302 static void init_trace_flags_index(struct trace_array *tr)
9303 {
9304 	int i;
9305 
9306 	/* Used by the trace options files */
9307 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9308 		tr->trace_flags_index[i] = i;
9309 }
9310 
__update_tracer_options(struct trace_array * tr)9311 static void __update_tracer_options(struct trace_array *tr)
9312 {
9313 	struct tracer *t;
9314 
9315 	for (t = trace_types; t; t = t->next)
9316 		add_tracer_options(tr, t);
9317 }
9318 
update_tracer_options(struct trace_array * tr)9319 static void update_tracer_options(struct trace_array *tr)
9320 {
9321 	mutex_lock(&trace_types_lock);
9322 	tracer_options_updated = true;
9323 	__update_tracer_options(tr);
9324 	mutex_unlock(&trace_types_lock);
9325 }
9326 
9327 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9328 struct trace_array *trace_array_find(const char *instance)
9329 {
9330 	struct trace_array *tr, *found = NULL;
9331 
9332 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9333 		if (tr->name && strcmp(tr->name, instance) == 0) {
9334 			found = tr;
9335 			break;
9336 		}
9337 	}
9338 
9339 	return found;
9340 }
9341 
trace_array_find_get(const char * instance)9342 struct trace_array *trace_array_find_get(const char *instance)
9343 {
9344 	struct trace_array *tr;
9345 
9346 	mutex_lock(&trace_types_lock);
9347 	tr = trace_array_find(instance);
9348 	if (tr)
9349 		tr->ref++;
9350 	mutex_unlock(&trace_types_lock);
9351 
9352 	return tr;
9353 }
9354 
trace_array_create_dir(struct trace_array * tr)9355 static int trace_array_create_dir(struct trace_array *tr)
9356 {
9357 	int ret;
9358 
9359 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9360 	if (!tr->dir)
9361 		return -EINVAL;
9362 
9363 	ret = event_trace_add_tracer(tr->dir, tr);
9364 	if (ret) {
9365 		tracefs_remove(tr->dir);
9366 		return ret;
9367 	}
9368 
9369 	init_tracer_tracefs(tr, tr->dir);
9370 	__update_tracer_options(tr);
9371 
9372 	return ret;
9373 }
9374 
9375 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9376 trace_array_create_systems(const char *name, const char *systems,
9377 			   unsigned long range_addr_start,
9378 			   unsigned long range_addr_size)
9379 {
9380 	struct trace_array *tr;
9381 	int ret;
9382 
9383 	ret = -ENOMEM;
9384 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9385 	if (!tr)
9386 		return ERR_PTR(ret);
9387 
9388 	tr->name = kstrdup(name, GFP_KERNEL);
9389 	if (!tr->name)
9390 		goto out_free_tr;
9391 
9392 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9393 		goto out_free_tr;
9394 
9395 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9396 		goto out_free_tr;
9397 
9398 	if (systems) {
9399 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9400 		if (!tr->system_names)
9401 			goto out_free_tr;
9402 	}
9403 
9404 	/* Only for boot up memory mapped ring buffers */
9405 	tr->range_addr_start = range_addr_start;
9406 	tr->range_addr_size = range_addr_size;
9407 
9408 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9409 
9410 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9411 
9412 	raw_spin_lock_init(&tr->start_lock);
9413 
9414 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9415 #ifdef CONFIG_TRACER_MAX_TRACE
9416 	spin_lock_init(&tr->snapshot_trigger_lock);
9417 #endif
9418 	tr->current_trace = &nop_trace;
9419 
9420 	INIT_LIST_HEAD(&tr->systems);
9421 	INIT_LIST_HEAD(&tr->events);
9422 	INIT_LIST_HEAD(&tr->hist_vars);
9423 	INIT_LIST_HEAD(&tr->err_log);
9424 
9425 #ifdef CONFIG_MODULES
9426 	INIT_LIST_HEAD(&tr->mod_events);
9427 #endif
9428 
9429 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9430 		goto out_free_tr;
9431 
9432 	/* The ring buffer is defaultly expanded */
9433 	trace_set_ring_buffer_expanded(tr);
9434 
9435 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9436 		goto out_free_tr;
9437 
9438 	ftrace_init_trace_array(tr);
9439 
9440 	init_trace_flags_index(tr);
9441 
9442 	if (trace_instance_dir) {
9443 		ret = trace_array_create_dir(tr);
9444 		if (ret)
9445 			goto out_free_tr;
9446 	} else
9447 		__trace_early_add_events(tr);
9448 
9449 	list_add(&tr->list, &ftrace_trace_arrays);
9450 
9451 	tr->ref++;
9452 
9453 	return tr;
9454 
9455  out_free_tr:
9456 	ftrace_free_ftrace_ops(tr);
9457 	free_trace_buffers(tr);
9458 	free_cpumask_var(tr->pipe_cpumask);
9459 	free_cpumask_var(tr->tracing_cpumask);
9460 	kfree_const(tr->system_names);
9461 	kfree(tr->name);
9462 	kfree(tr);
9463 
9464 	return ERR_PTR(ret);
9465 }
9466 
trace_array_create(const char * name)9467 static struct trace_array *trace_array_create(const char *name)
9468 {
9469 	return trace_array_create_systems(name, NULL, 0, 0);
9470 }
9471 
instance_mkdir(const char * name)9472 static int instance_mkdir(const char *name)
9473 {
9474 	struct trace_array *tr;
9475 	int ret;
9476 
9477 	guard(mutex)(&event_mutex);
9478 	guard(mutex)(&trace_types_lock);
9479 
9480 	ret = -EEXIST;
9481 	if (trace_array_find(name))
9482 		return -EEXIST;
9483 
9484 	tr = trace_array_create(name);
9485 
9486 	ret = PTR_ERR_OR_ZERO(tr);
9487 
9488 	return ret;
9489 }
9490 
map_pages(u64 start,u64 size)9491 static u64 map_pages(u64 start, u64 size)
9492 {
9493 	struct page **pages;
9494 	phys_addr_t page_start;
9495 	unsigned int page_count;
9496 	unsigned int i;
9497 	void *vaddr;
9498 
9499 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9500 
9501 	page_start = start;
9502 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9503 	if (!pages)
9504 		return 0;
9505 
9506 	for (i = 0; i < page_count; i++) {
9507 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9508 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9509 	}
9510 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9511 	kfree(pages);
9512 
9513 	return (u64)(unsigned long)vaddr;
9514 }
9515 
9516 /**
9517  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9518  * @name: The name of the trace array to be looked up/created.
9519  * @systems: A list of systems to create event directories for (NULL for all)
9520  *
9521  * Returns pointer to trace array with given name.
9522  * NULL, if it cannot be created.
9523  *
9524  * NOTE: This function increments the reference counter associated with the
9525  * trace array returned. This makes sure it cannot be freed while in use.
9526  * Use trace_array_put() once the trace array is no longer needed.
9527  * If the trace_array is to be freed, trace_array_destroy() needs to
9528  * be called after the trace_array_put(), or simply let user space delete
9529  * it from the tracefs instances directory. But until the
9530  * trace_array_put() is called, user space can not delete it.
9531  *
9532  */
trace_array_get_by_name(const char * name,const char * systems)9533 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9534 {
9535 	struct trace_array *tr;
9536 
9537 	guard(mutex)(&event_mutex);
9538 	guard(mutex)(&trace_types_lock);
9539 
9540 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9541 		if (tr->name && strcmp(tr->name, name) == 0) {
9542 			tr->ref++;
9543 			return tr;
9544 		}
9545 	}
9546 
9547 	tr = trace_array_create_systems(name, systems, 0, 0);
9548 
9549 	if (IS_ERR(tr))
9550 		tr = NULL;
9551 	else
9552 		tr->ref++;
9553 
9554 	return tr;
9555 }
9556 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9557 
__remove_instance(struct trace_array * tr)9558 static int __remove_instance(struct trace_array *tr)
9559 {
9560 	int i;
9561 
9562 	/* Reference counter for a newly created trace array = 1. */
9563 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9564 		return -EBUSY;
9565 
9566 	list_del(&tr->list);
9567 
9568 	/* Disable all the flags that were enabled coming in */
9569 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9570 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9571 			set_tracer_flag(tr, 1 << i, 0);
9572 	}
9573 
9574 	if (printk_trace == tr)
9575 		update_printk_trace(&global_trace);
9576 
9577 	tracing_set_nop(tr);
9578 	clear_ftrace_function_probes(tr);
9579 	event_trace_del_tracer(tr);
9580 	ftrace_clear_pids(tr);
9581 	ftrace_destroy_function_files(tr);
9582 	tracefs_remove(tr->dir);
9583 	free_percpu(tr->last_func_repeats);
9584 	free_trace_buffers(tr);
9585 	clear_tracing_err_log(tr);
9586 
9587 	for (i = 0; i < tr->nr_topts; i++) {
9588 		kfree(tr->topts[i].topts);
9589 	}
9590 	kfree(tr->topts);
9591 
9592 	free_cpumask_var(tr->pipe_cpumask);
9593 	free_cpumask_var(tr->tracing_cpumask);
9594 	kfree_const(tr->system_names);
9595 	kfree(tr->name);
9596 	kfree(tr);
9597 
9598 	return 0;
9599 }
9600 
trace_array_destroy(struct trace_array * this_tr)9601 int trace_array_destroy(struct trace_array *this_tr)
9602 {
9603 	struct trace_array *tr;
9604 
9605 	if (!this_tr)
9606 		return -EINVAL;
9607 
9608 	guard(mutex)(&event_mutex);
9609 	guard(mutex)(&trace_types_lock);
9610 
9611 
9612 	/* Making sure trace array exists before destroying it. */
9613 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9614 		if (tr == this_tr)
9615 			return __remove_instance(tr);
9616 	}
9617 
9618 	return -ENODEV;
9619 }
9620 EXPORT_SYMBOL_GPL(trace_array_destroy);
9621 
instance_rmdir(const char * name)9622 static int instance_rmdir(const char *name)
9623 {
9624 	struct trace_array *tr;
9625 
9626 	guard(mutex)(&event_mutex);
9627 	guard(mutex)(&trace_types_lock);
9628 
9629 	tr = trace_array_find(name);
9630 	if (!tr)
9631 		return -ENODEV;
9632 
9633 	return __remove_instance(tr);
9634 }
9635 
create_trace_instances(struct dentry * d_tracer)9636 static __init void create_trace_instances(struct dentry *d_tracer)
9637 {
9638 	struct trace_array *tr;
9639 
9640 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9641 							 instance_mkdir,
9642 							 instance_rmdir);
9643 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9644 		return;
9645 
9646 	guard(mutex)(&event_mutex);
9647 	guard(mutex)(&trace_types_lock);
9648 
9649 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9650 		if (!tr->name)
9651 			continue;
9652 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9653 			     "Failed to create instance directory\n"))
9654 			return;
9655 	}
9656 }
9657 
9658 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9659 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9660 {
9661 	int cpu;
9662 
9663 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9664 			tr, &show_traces_fops);
9665 
9666 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9667 			tr, &set_tracer_fops);
9668 
9669 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9670 			  tr, &tracing_cpumask_fops);
9671 
9672 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9673 			  tr, &tracing_iter_fops);
9674 
9675 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9676 			  tr, &tracing_fops);
9677 
9678 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9679 			  tr, &tracing_pipe_fops);
9680 
9681 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9682 			  tr, &tracing_entries_fops);
9683 
9684 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9685 			  tr, &tracing_total_entries_fops);
9686 
9687 	trace_create_file("free_buffer", 0200, d_tracer,
9688 			  tr, &tracing_free_buffer_fops);
9689 
9690 	trace_create_file("trace_marker", 0220, d_tracer,
9691 			  tr, &tracing_mark_fops);
9692 
9693 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9694 
9695 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9696 			  tr, &tracing_mark_raw_fops);
9697 
9698 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9699 			  &trace_clock_fops);
9700 
9701 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9702 			  tr, &rb_simple_fops);
9703 
9704 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9705 			  &trace_time_stamp_mode_fops);
9706 
9707 	tr->buffer_percent = 50;
9708 
9709 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9710 			tr, &buffer_percent_fops);
9711 
9712 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9713 			  tr, &buffer_subbuf_size_fops);
9714 
9715 	create_trace_options_dir(tr);
9716 
9717 #ifdef CONFIG_TRACER_MAX_TRACE
9718 	trace_create_maxlat_file(tr, d_tracer);
9719 #endif
9720 
9721 	if (ftrace_create_function_files(tr, d_tracer))
9722 		MEM_FAIL(1, "Could not allocate function filter files");
9723 
9724 	if (tr->range_addr_start) {
9725 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9726 				  tr, &last_boot_fops);
9727 #ifdef CONFIG_TRACER_SNAPSHOT
9728 	} else {
9729 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9730 				  tr, &snapshot_fops);
9731 #endif
9732 	}
9733 
9734 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9735 			  tr, &tracing_err_log_fops);
9736 
9737 	for_each_tracing_cpu(cpu)
9738 		tracing_init_tracefs_percpu(tr, cpu);
9739 
9740 	ftrace_init_tracefs(tr, d_tracer);
9741 }
9742 
trace_automount(struct dentry * mntpt,void * ingore)9743 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9744 {
9745 	struct vfsmount *mnt;
9746 	struct file_system_type *type;
9747 
9748 	/*
9749 	 * To maintain backward compatibility for tools that mount
9750 	 * debugfs to get to the tracing facility, tracefs is automatically
9751 	 * mounted to the debugfs/tracing directory.
9752 	 */
9753 	type = get_fs_type("tracefs");
9754 	if (!type)
9755 		return NULL;
9756 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9757 	put_filesystem(type);
9758 	if (IS_ERR(mnt))
9759 		return NULL;
9760 	mntget(mnt);
9761 
9762 	return mnt;
9763 }
9764 
9765 /**
9766  * tracing_init_dentry - initialize top level trace array
9767  *
9768  * This is called when creating files or directories in the tracing
9769  * directory. It is called via fs_initcall() by any of the boot up code
9770  * and expects to return the dentry of the top level tracing directory.
9771  */
tracing_init_dentry(void)9772 int tracing_init_dentry(void)
9773 {
9774 	struct trace_array *tr = &global_trace;
9775 
9776 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9777 		pr_warn("Tracing disabled due to lockdown\n");
9778 		return -EPERM;
9779 	}
9780 
9781 	/* The top level trace array uses  NULL as parent */
9782 	if (tr->dir)
9783 		return 0;
9784 
9785 	if (WARN_ON(!tracefs_initialized()))
9786 		return -ENODEV;
9787 
9788 	/*
9789 	 * As there may still be users that expect the tracing
9790 	 * files to exist in debugfs/tracing, we must automount
9791 	 * the tracefs file system there, so older tools still
9792 	 * work with the newer kernel.
9793 	 */
9794 	tr->dir = debugfs_create_automount("tracing", NULL,
9795 					   trace_automount, NULL);
9796 
9797 	return 0;
9798 }
9799 
9800 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9801 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9802 
9803 static struct workqueue_struct *eval_map_wq __initdata;
9804 static struct work_struct eval_map_work __initdata;
9805 static struct work_struct tracerfs_init_work __initdata;
9806 
eval_map_work_func(struct work_struct * work)9807 static void __init eval_map_work_func(struct work_struct *work)
9808 {
9809 	int len;
9810 
9811 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9812 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9813 }
9814 
trace_eval_init(void)9815 static int __init trace_eval_init(void)
9816 {
9817 	INIT_WORK(&eval_map_work, eval_map_work_func);
9818 
9819 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9820 	if (!eval_map_wq) {
9821 		pr_err("Unable to allocate eval_map_wq\n");
9822 		/* Do work here */
9823 		eval_map_work_func(&eval_map_work);
9824 		return -ENOMEM;
9825 	}
9826 
9827 	queue_work(eval_map_wq, &eval_map_work);
9828 	return 0;
9829 }
9830 
9831 subsys_initcall(trace_eval_init);
9832 
trace_eval_sync(void)9833 static int __init trace_eval_sync(void)
9834 {
9835 	/* Make sure the eval map updates are finished */
9836 	if (eval_map_wq)
9837 		destroy_workqueue(eval_map_wq);
9838 	return 0;
9839 }
9840 
9841 late_initcall_sync(trace_eval_sync);
9842 
9843 
9844 #ifdef CONFIG_MODULES
9845 
module_exists(const char * module)9846 bool module_exists(const char *module)
9847 {
9848 	/* All modules have the symbol __this_module */
9849 	static const char this_mod[] = "__this_module";
9850 	char modname[MAX_PARAM_PREFIX_LEN + sizeof(this_mod) + 2];
9851 	unsigned long val;
9852 	int n;
9853 
9854 	n = snprintf(modname, sizeof(modname), "%s:%s", module, this_mod);
9855 
9856 	if (n > sizeof(modname) - 1)
9857 		return false;
9858 
9859 	val = module_kallsyms_lookup_name(modname);
9860 	return val != 0;
9861 }
9862 
trace_module_add_evals(struct module * mod)9863 static void trace_module_add_evals(struct module *mod)
9864 {
9865 	if (!mod->num_trace_evals)
9866 		return;
9867 
9868 	/*
9869 	 * Modules with bad taint do not have events created, do
9870 	 * not bother with enums either.
9871 	 */
9872 	if (trace_module_has_bad_taint(mod))
9873 		return;
9874 
9875 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9876 }
9877 
9878 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9879 static void trace_module_remove_evals(struct module *mod)
9880 {
9881 	union trace_eval_map_item *map;
9882 	union trace_eval_map_item **last = &trace_eval_maps;
9883 
9884 	if (!mod->num_trace_evals)
9885 		return;
9886 
9887 	guard(mutex)(&trace_eval_mutex);
9888 
9889 	map = trace_eval_maps;
9890 
9891 	while (map) {
9892 		if (map->head.mod == mod)
9893 			break;
9894 		map = trace_eval_jmp_to_tail(map);
9895 		last = &map->tail.next;
9896 		map = map->tail.next;
9897 	}
9898 	if (!map)
9899 		return;
9900 
9901 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9902 	kfree(map);
9903 }
9904 #else
trace_module_remove_evals(struct module * mod)9905 static inline void trace_module_remove_evals(struct module *mod) { }
9906 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9907 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9908 static int trace_module_notify(struct notifier_block *self,
9909 			       unsigned long val, void *data)
9910 {
9911 	struct module *mod = data;
9912 
9913 	switch (val) {
9914 	case MODULE_STATE_COMING:
9915 		trace_module_add_evals(mod);
9916 		break;
9917 	case MODULE_STATE_GOING:
9918 		trace_module_remove_evals(mod);
9919 		break;
9920 	}
9921 
9922 	return NOTIFY_OK;
9923 }
9924 
9925 static struct notifier_block trace_module_nb = {
9926 	.notifier_call = trace_module_notify,
9927 	.priority = 0,
9928 };
9929 #endif /* CONFIG_MODULES */
9930 
tracer_init_tracefs_work_func(struct work_struct * work)9931 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9932 {
9933 
9934 	event_trace_init();
9935 
9936 	init_tracer_tracefs(&global_trace, NULL);
9937 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9938 
9939 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9940 			&global_trace, &tracing_thresh_fops);
9941 
9942 	trace_create_file("README", TRACE_MODE_READ, NULL,
9943 			NULL, &tracing_readme_fops);
9944 
9945 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9946 			NULL, &tracing_saved_cmdlines_fops);
9947 
9948 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9949 			  NULL, &tracing_saved_cmdlines_size_fops);
9950 
9951 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9952 			NULL, &tracing_saved_tgids_fops);
9953 
9954 	trace_create_eval_file(NULL);
9955 
9956 #ifdef CONFIG_MODULES
9957 	register_module_notifier(&trace_module_nb);
9958 #endif
9959 
9960 #ifdef CONFIG_DYNAMIC_FTRACE
9961 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
9962 			NULL, &tracing_dyn_info_fops);
9963 #endif
9964 
9965 	create_trace_instances(NULL);
9966 
9967 	update_tracer_options(&global_trace);
9968 }
9969 
tracer_init_tracefs(void)9970 static __init int tracer_init_tracefs(void)
9971 {
9972 	int ret;
9973 
9974 	trace_access_lock_init();
9975 
9976 	ret = tracing_init_dentry();
9977 	if (ret)
9978 		return 0;
9979 
9980 	if (eval_map_wq) {
9981 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
9982 		queue_work(eval_map_wq, &tracerfs_init_work);
9983 	} else {
9984 		tracer_init_tracefs_work_func(NULL);
9985 	}
9986 
9987 	rv_init_interface();
9988 
9989 	return 0;
9990 }
9991 
9992 fs_initcall(tracer_init_tracefs);
9993 
9994 static int trace_die_panic_handler(struct notifier_block *self,
9995 				unsigned long ev, void *unused);
9996 
9997 static struct notifier_block trace_panic_notifier = {
9998 	.notifier_call = trace_die_panic_handler,
9999 	.priority = INT_MAX - 1,
10000 };
10001 
10002 static struct notifier_block trace_die_notifier = {
10003 	.notifier_call = trace_die_panic_handler,
10004 	.priority = INT_MAX - 1,
10005 };
10006 
10007 /*
10008  * The idea is to execute the following die/panic callback early, in order
10009  * to avoid showing irrelevant information in the trace (like other panic
10010  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10011  * warnings get disabled (to prevent potential log flooding).
10012  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10013 static int trace_die_panic_handler(struct notifier_block *self,
10014 				unsigned long ev, void *unused)
10015 {
10016 	if (!ftrace_dump_on_oops_enabled())
10017 		return NOTIFY_DONE;
10018 
10019 	/* The die notifier requires DIE_OOPS to trigger */
10020 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10021 		return NOTIFY_DONE;
10022 
10023 	ftrace_dump(DUMP_PARAM);
10024 
10025 	return NOTIFY_DONE;
10026 }
10027 
10028 /*
10029  * printk is set to max of 1024, we really don't need it that big.
10030  * Nothing should be printing 1000 characters anyway.
10031  */
10032 #define TRACE_MAX_PRINT		1000
10033 
10034 /*
10035  * Define here KERN_TRACE so that we have one place to modify
10036  * it if we decide to change what log level the ftrace dump
10037  * should be at.
10038  */
10039 #define KERN_TRACE		KERN_EMERG
10040 
10041 void
trace_printk_seq(struct trace_seq * s)10042 trace_printk_seq(struct trace_seq *s)
10043 {
10044 	/* Probably should print a warning here. */
10045 	if (s->seq.len >= TRACE_MAX_PRINT)
10046 		s->seq.len = TRACE_MAX_PRINT;
10047 
10048 	/*
10049 	 * More paranoid code. Although the buffer size is set to
10050 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10051 	 * an extra layer of protection.
10052 	 */
10053 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10054 		s->seq.len = s->seq.size - 1;
10055 
10056 	/* should be zero ended, but we are paranoid. */
10057 	s->buffer[s->seq.len] = 0;
10058 
10059 	printk(KERN_TRACE "%s", s->buffer);
10060 
10061 	trace_seq_init(s);
10062 }
10063 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10064 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10065 {
10066 	iter->tr = tr;
10067 	iter->trace = iter->tr->current_trace;
10068 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10069 	iter->array_buffer = &tr->array_buffer;
10070 
10071 	if (iter->trace && iter->trace->open)
10072 		iter->trace->open(iter);
10073 
10074 	/* Annotate start of buffers if we had overruns */
10075 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10076 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10077 
10078 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10079 	if (trace_clocks[iter->tr->clock_id].in_ns)
10080 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10081 
10082 	/* Can not use kmalloc for iter.temp and iter.fmt */
10083 	iter->temp = static_temp_buf;
10084 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10085 	iter->fmt = static_fmt_buf;
10086 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10087 }
10088 
trace_init_global_iter(struct trace_iterator * iter)10089 void trace_init_global_iter(struct trace_iterator *iter)
10090 {
10091 	trace_init_iter(iter, &global_trace);
10092 }
10093 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10094 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10095 {
10096 	/* use static because iter can be a bit big for the stack */
10097 	static struct trace_iterator iter;
10098 	unsigned int old_userobj;
10099 	unsigned long flags;
10100 	int cnt = 0, cpu;
10101 
10102 	/*
10103 	 * Always turn off tracing when we dump.
10104 	 * We don't need to show trace output of what happens
10105 	 * between multiple crashes.
10106 	 *
10107 	 * If the user does a sysrq-z, then they can re-enable
10108 	 * tracing with echo 1 > tracing_on.
10109 	 */
10110 	tracer_tracing_off(tr);
10111 
10112 	local_irq_save(flags);
10113 
10114 	/* Simulate the iterator */
10115 	trace_init_iter(&iter, tr);
10116 
10117 	for_each_tracing_cpu(cpu) {
10118 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10119 	}
10120 
10121 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10122 
10123 	/* don't look at user memory in panic mode */
10124 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10125 
10126 	if (dump_mode == DUMP_ORIG)
10127 		iter.cpu_file = raw_smp_processor_id();
10128 	else
10129 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10130 
10131 	if (tr == &global_trace)
10132 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10133 	else
10134 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10135 
10136 	/* Did function tracer already get disabled? */
10137 	if (ftrace_is_dead()) {
10138 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10139 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10140 	}
10141 
10142 	/*
10143 	 * We need to stop all tracing on all CPUS to read
10144 	 * the next buffer. This is a bit expensive, but is
10145 	 * not done often. We fill all what we can read,
10146 	 * and then release the locks again.
10147 	 */
10148 
10149 	while (!trace_empty(&iter)) {
10150 
10151 		if (!cnt)
10152 			printk(KERN_TRACE "---------------------------------\n");
10153 
10154 		cnt++;
10155 
10156 		trace_iterator_reset(&iter);
10157 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10158 
10159 		if (trace_find_next_entry_inc(&iter) != NULL) {
10160 			int ret;
10161 
10162 			ret = print_trace_line(&iter);
10163 			if (ret != TRACE_TYPE_NO_CONSUME)
10164 				trace_consume(&iter);
10165 		}
10166 		touch_nmi_watchdog();
10167 
10168 		trace_printk_seq(&iter.seq);
10169 	}
10170 
10171 	if (!cnt)
10172 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10173 	else
10174 		printk(KERN_TRACE "---------------------------------\n");
10175 
10176 	tr->trace_flags |= old_userobj;
10177 
10178 	for_each_tracing_cpu(cpu) {
10179 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10180 	}
10181 	local_irq_restore(flags);
10182 }
10183 
ftrace_dump_by_param(void)10184 static void ftrace_dump_by_param(void)
10185 {
10186 	bool first_param = true;
10187 	char dump_param[MAX_TRACER_SIZE];
10188 	char *buf, *token, *inst_name;
10189 	struct trace_array *tr;
10190 
10191 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10192 	buf = dump_param;
10193 
10194 	while ((token = strsep(&buf, ",")) != NULL) {
10195 		if (first_param) {
10196 			first_param = false;
10197 			if (!strcmp("0", token))
10198 				continue;
10199 			else if (!strcmp("1", token)) {
10200 				ftrace_dump_one(&global_trace, DUMP_ALL);
10201 				continue;
10202 			}
10203 			else if (!strcmp("2", token) ||
10204 			  !strcmp("orig_cpu", token)) {
10205 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10206 				continue;
10207 			}
10208 		}
10209 
10210 		inst_name = strsep(&token, "=");
10211 		tr = trace_array_find(inst_name);
10212 		if (!tr) {
10213 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10214 			continue;
10215 		}
10216 
10217 		if (token && (!strcmp("2", token) ||
10218 			  !strcmp("orig_cpu", token)))
10219 			ftrace_dump_one(tr, DUMP_ORIG);
10220 		else
10221 			ftrace_dump_one(tr, DUMP_ALL);
10222 	}
10223 }
10224 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10225 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10226 {
10227 	static atomic_t dump_running;
10228 
10229 	/* Only allow one dump user at a time. */
10230 	if (atomic_inc_return(&dump_running) != 1) {
10231 		atomic_dec(&dump_running);
10232 		return;
10233 	}
10234 
10235 	switch (oops_dump_mode) {
10236 	case DUMP_ALL:
10237 		ftrace_dump_one(&global_trace, DUMP_ALL);
10238 		break;
10239 	case DUMP_ORIG:
10240 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10241 		break;
10242 	case DUMP_PARAM:
10243 		ftrace_dump_by_param();
10244 		break;
10245 	case DUMP_NONE:
10246 		break;
10247 	default:
10248 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10249 		ftrace_dump_one(&global_trace, DUMP_ALL);
10250 	}
10251 
10252 	atomic_dec(&dump_running);
10253 }
10254 EXPORT_SYMBOL_GPL(ftrace_dump);
10255 
10256 #define WRITE_BUFSIZE  4096
10257 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10258 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10259 				size_t count, loff_t *ppos,
10260 				int (*createfn)(const char *))
10261 {
10262 	char *kbuf, *buf, *tmp;
10263 	int ret = 0;
10264 	size_t done = 0;
10265 	size_t size;
10266 
10267 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10268 	if (!kbuf)
10269 		return -ENOMEM;
10270 
10271 	while (done < count) {
10272 		size = count - done;
10273 
10274 		if (size >= WRITE_BUFSIZE)
10275 			size = WRITE_BUFSIZE - 1;
10276 
10277 		if (copy_from_user(kbuf, buffer + done, size)) {
10278 			ret = -EFAULT;
10279 			goto out;
10280 		}
10281 		kbuf[size] = '\0';
10282 		buf = kbuf;
10283 		do {
10284 			tmp = strchr(buf, '\n');
10285 			if (tmp) {
10286 				*tmp = '\0';
10287 				size = tmp - buf + 1;
10288 			} else {
10289 				size = strlen(buf);
10290 				if (done + size < count) {
10291 					if (buf != kbuf)
10292 						break;
10293 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10294 					pr_warn("Line length is too long: Should be less than %d\n",
10295 						WRITE_BUFSIZE - 2);
10296 					ret = -EINVAL;
10297 					goto out;
10298 				}
10299 			}
10300 			done += size;
10301 
10302 			/* Remove comments */
10303 			tmp = strchr(buf, '#');
10304 
10305 			if (tmp)
10306 				*tmp = '\0';
10307 
10308 			ret = createfn(buf);
10309 			if (ret)
10310 				goto out;
10311 			buf += size;
10312 
10313 		} while (done < count);
10314 	}
10315 	ret = done;
10316 
10317 out:
10318 	kfree(kbuf);
10319 
10320 	return ret;
10321 }
10322 
10323 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10324 __init static bool tr_needs_alloc_snapshot(const char *name)
10325 {
10326 	char *test;
10327 	int len = strlen(name);
10328 	bool ret;
10329 
10330 	if (!boot_snapshot_index)
10331 		return false;
10332 
10333 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10334 	    boot_snapshot_info[len] == '\t')
10335 		return true;
10336 
10337 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10338 	if (!test)
10339 		return false;
10340 
10341 	sprintf(test, "\t%s\t", name);
10342 	ret = strstr(boot_snapshot_info, test) == NULL;
10343 	kfree(test);
10344 	return ret;
10345 }
10346 
do_allocate_snapshot(const char * name)10347 __init static void do_allocate_snapshot(const char *name)
10348 {
10349 	if (!tr_needs_alloc_snapshot(name))
10350 		return;
10351 
10352 	/*
10353 	 * When allocate_snapshot is set, the next call to
10354 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10355 	 * will allocate the snapshot buffer. That will alse clear
10356 	 * this flag.
10357 	 */
10358 	allocate_snapshot = true;
10359 }
10360 #else
do_allocate_snapshot(const char * name)10361 static inline void do_allocate_snapshot(const char *name) { }
10362 #endif
10363 
enable_instances(void)10364 __init static void enable_instances(void)
10365 {
10366 	struct trace_array *tr;
10367 	char *curr_str;
10368 	char *name;
10369 	char *str;
10370 	char *tok;
10371 
10372 	/* A tab is always appended */
10373 	boot_instance_info[boot_instance_index - 1] = '\0';
10374 	str = boot_instance_info;
10375 
10376 	while ((curr_str = strsep(&str, "\t"))) {
10377 		phys_addr_t start = 0;
10378 		phys_addr_t size = 0;
10379 		unsigned long addr = 0;
10380 		bool traceprintk = false;
10381 		bool traceoff = false;
10382 		char *flag_delim;
10383 		char *addr_delim;
10384 
10385 		tok = strsep(&curr_str, ",");
10386 
10387 		flag_delim = strchr(tok, '^');
10388 		addr_delim = strchr(tok, '@');
10389 
10390 		if (addr_delim)
10391 			*addr_delim++ = '\0';
10392 
10393 		if (flag_delim)
10394 			*flag_delim++ = '\0';
10395 
10396 		name = tok;
10397 
10398 		if (flag_delim) {
10399 			char *flag;
10400 
10401 			while ((flag = strsep(&flag_delim, "^"))) {
10402 				if (strcmp(flag, "traceoff") == 0) {
10403 					traceoff = true;
10404 				} else if ((strcmp(flag, "printk") == 0) ||
10405 					   (strcmp(flag, "traceprintk") == 0) ||
10406 					   (strcmp(flag, "trace_printk") == 0)) {
10407 					traceprintk = true;
10408 				} else {
10409 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10410 						flag, name);
10411 				}
10412 			}
10413 		}
10414 
10415 		tok = addr_delim;
10416 		if (tok && isdigit(*tok)) {
10417 			start = memparse(tok, &tok);
10418 			if (!start) {
10419 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10420 					name);
10421 				continue;
10422 			}
10423 			if (*tok != ':') {
10424 				pr_warn("Tracing: No size specified for instance %s\n", name);
10425 				continue;
10426 			}
10427 			tok++;
10428 			size = memparse(tok, &tok);
10429 			if (!size) {
10430 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10431 					name);
10432 				continue;
10433 			}
10434 		} else if (tok) {
10435 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10436 				start = 0;
10437 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10438 				continue;
10439 			}
10440 		}
10441 
10442 		if (start) {
10443 			addr = map_pages(start, size);
10444 			if (addr) {
10445 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10446 					name, &start, (unsigned long)size);
10447 			} else {
10448 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10449 				continue;
10450 			}
10451 		} else {
10452 			/* Only non mapped buffers have snapshot buffers */
10453 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10454 				do_allocate_snapshot(name);
10455 		}
10456 
10457 		tr = trace_array_create_systems(name, NULL, addr, size);
10458 		if (IS_ERR(tr)) {
10459 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10460 			continue;
10461 		}
10462 
10463 		if (traceoff)
10464 			tracer_tracing_off(tr);
10465 
10466 		if (traceprintk)
10467 			update_printk_trace(tr);
10468 
10469 		/*
10470 		 * If start is set, then this is a mapped buffer, and
10471 		 * cannot be deleted by user space, so keep the reference
10472 		 * to it.
10473 		 */
10474 		if (start) {
10475 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10476 			tr->ref++;
10477 		}
10478 
10479 		while ((tok = strsep(&curr_str, ","))) {
10480 			early_enable_events(tr, tok, true);
10481 		}
10482 	}
10483 }
10484 
tracer_alloc_buffers(void)10485 __init static int tracer_alloc_buffers(void)
10486 {
10487 	int ring_buf_size;
10488 	int ret = -ENOMEM;
10489 
10490 
10491 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10492 		pr_warn("Tracing disabled due to lockdown\n");
10493 		return -EPERM;
10494 	}
10495 
10496 	/*
10497 	 * Make sure we don't accidentally add more trace options
10498 	 * than we have bits for.
10499 	 */
10500 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10501 
10502 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10503 		goto out;
10504 
10505 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10506 		goto out_free_buffer_mask;
10507 
10508 	/* Only allocate trace_printk buffers if a trace_printk exists */
10509 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10510 		/* Must be called before global_trace.buffer is allocated */
10511 		trace_printk_init_buffers();
10512 
10513 	/* To save memory, keep the ring buffer size to its minimum */
10514 	if (global_trace.ring_buffer_expanded)
10515 		ring_buf_size = trace_buf_size;
10516 	else
10517 		ring_buf_size = 1;
10518 
10519 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10520 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10521 
10522 	raw_spin_lock_init(&global_trace.start_lock);
10523 
10524 	/*
10525 	 * The prepare callbacks allocates some memory for the ring buffer. We
10526 	 * don't free the buffer if the CPU goes down. If we were to free
10527 	 * the buffer, then the user would lose any trace that was in the
10528 	 * buffer. The memory will be removed once the "instance" is removed.
10529 	 */
10530 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10531 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10532 				      NULL);
10533 	if (ret < 0)
10534 		goto out_free_cpumask;
10535 	/* Used for event triggers */
10536 	ret = -ENOMEM;
10537 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10538 	if (!temp_buffer)
10539 		goto out_rm_hp_state;
10540 
10541 	if (trace_create_savedcmd() < 0)
10542 		goto out_free_temp_buffer;
10543 
10544 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10545 		goto out_free_savedcmd;
10546 
10547 	/* TODO: make the number of buffers hot pluggable with CPUS */
10548 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10549 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10550 		goto out_free_pipe_cpumask;
10551 	}
10552 	if (global_trace.buffer_disabled)
10553 		tracing_off();
10554 
10555 	if (trace_boot_clock) {
10556 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10557 		if (ret < 0)
10558 			pr_warn("Trace clock %s not defined, going back to default\n",
10559 				trace_boot_clock);
10560 	}
10561 
10562 	/*
10563 	 * register_tracer() might reference current_trace, so it
10564 	 * needs to be set before we register anything. This is
10565 	 * just a bootstrap of current_trace anyway.
10566 	 */
10567 	global_trace.current_trace = &nop_trace;
10568 
10569 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10570 #ifdef CONFIG_TRACER_MAX_TRACE
10571 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10572 #endif
10573 	ftrace_init_global_array_ops(&global_trace);
10574 
10575 #ifdef CONFIG_MODULES
10576 	INIT_LIST_HEAD(&global_trace.mod_events);
10577 #endif
10578 
10579 	init_trace_flags_index(&global_trace);
10580 
10581 	register_tracer(&nop_trace);
10582 
10583 	/* Function tracing may start here (via kernel command line) */
10584 	init_function_trace();
10585 
10586 	/* All seems OK, enable tracing */
10587 	tracing_disabled = 0;
10588 
10589 	atomic_notifier_chain_register(&panic_notifier_list,
10590 				       &trace_panic_notifier);
10591 
10592 	register_die_notifier(&trace_die_notifier);
10593 
10594 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10595 
10596 	INIT_LIST_HEAD(&global_trace.systems);
10597 	INIT_LIST_HEAD(&global_trace.events);
10598 	INIT_LIST_HEAD(&global_trace.hist_vars);
10599 	INIT_LIST_HEAD(&global_trace.err_log);
10600 	list_add(&global_trace.list, &ftrace_trace_arrays);
10601 
10602 	apply_trace_boot_options();
10603 
10604 	register_snapshot_cmd();
10605 
10606 	return 0;
10607 
10608 out_free_pipe_cpumask:
10609 	free_cpumask_var(global_trace.pipe_cpumask);
10610 out_free_savedcmd:
10611 	trace_free_saved_cmdlines_buffer();
10612 out_free_temp_buffer:
10613 	ring_buffer_free(temp_buffer);
10614 out_rm_hp_state:
10615 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10616 out_free_cpumask:
10617 	free_cpumask_var(global_trace.tracing_cpumask);
10618 out_free_buffer_mask:
10619 	free_cpumask_var(tracing_buffer_mask);
10620 out:
10621 	return ret;
10622 }
10623 
10624 #ifdef CONFIG_FUNCTION_TRACER
10625 /* Used to set module cached ftrace filtering at boot up */
trace_get_global_array(void)10626 __init struct trace_array *trace_get_global_array(void)
10627 {
10628 	return &global_trace;
10629 }
10630 #endif
10631 
ftrace_boot_snapshot(void)10632 void __init ftrace_boot_snapshot(void)
10633 {
10634 #ifdef CONFIG_TRACER_MAX_TRACE
10635 	struct trace_array *tr;
10636 
10637 	if (!snapshot_at_boot)
10638 		return;
10639 
10640 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10641 		if (!tr->allocated_snapshot)
10642 			continue;
10643 
10644 		tracing_snapshot_instance(tr);
10645 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10646 	}
10647 #endif
10648 }
10649 
early_trace_init(void)10650 void __init early_trace_init(void)
10651 {
10652 	if (tracepoint_printk) {
10653 		tracepoint_print_iter =
10654 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10655 		if (MEM_FAIL(!tracepoint_print_iter,
10656 			     "Failed to allocate trace iterator\n"))
10657 			tracepoint_printk = 0;
10658 		else
10659 			static_key_enable(&tracepoint_printk_key.key);
10660 	}
10661 	tracer_alloc_buffers();
10662 
10663 	init_events();
10664 }
10665 
trace_init(void)10666 void __init trace_init(void)
10667 {
10668 	trace_event_init();
10669 
10670 	if (boot_instance_index)
10671 		enable_instances();
10672 }
10673 
clear_boot_tracer(void)10674 __init static void clear_boot_tracer(void)
10675 {
10676 	/*
10677 	 * The default tracer at boot buffer is an init section.
10678 	 * This function is called in lateinit. If we did not
10679 	 * find the boot tracer, then clear it out, to prevent
10680 	 * later registration from accessing the buffer that is
10681 	 * about to be freed.
10682 	 */
10683 	if (!default_bootup_tracer)
10684 		return;
10685 
10686 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10687 	       default_bootup_tracer);
10688 	default_bootup_tracer = NULL;
10689 }
10690 
10691 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10692 __init static void tracing_set_default_clock(void)
10693 {
10694 	/* sched_clock_stable() is determined in late_initcall */
10695 	if (!trace_boot_clock && !sched_clock_stable()) {
10696 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10697 			pr_warn("Can not set tracing clock due to lockdown\n");
10698 			return;
10699 		}
10700 
10701 		printk(KERN_WARNING
10702 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10703 		       "If you want to keep using the local clock, then add:\n"
10704 		       "  \"trace_clock=local\"\n"
10705 		       "on the kernel command line\n");
10706 		tracing_set_clock(&global_trace, "global");
10707 	}
10708 }
10709 #else
tracing_set_default_clock(void)10710 static inline void tracing_set_default_clock(void) { }
10711 #endif
10712 
late_trace_init(void)10713 __init static int late_trace_init(void)
10714 {
10715 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10716 		static_key_disable(&tracepoint_printk_key.key);
10717 		tracepoint_printk = 0;
10718 	}
10719 
10720 	if (traceoff_after_boot)
10721 		tracing_off();
10722 
10723 	tracing_set_default_clock();
10724 	clear_boot_tracer();
10725 	return 0;
10726 }
10727 
10728 late_initcall_sync(late_trace_init);
10729