xref: /linux/kernel/trace/trace.c (revision 2c2b3d906c88a40b75f8d258119400a047e587de)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
disable_tracing_selftest(const char * reason)73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
dummy_set_flag(struct trace_array * tr,u32 old_flags,u32 bit,int set)97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139 
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142 
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146 	struct module			*mod;
147 	unsigned long			length;
148 };
149 
150 union trace_eval_map_item;
151 
152 struct trace_eval_map_tail {
153 	/*
154 	 * "end" is first and points to NULL as it must be different
155 	 * than "mod" or "eval_string"
156 	 */
157 	union trace_eval_map_item	*next;
158 	const char			*end;	/* points to NULL */
159 };
160 
161 static DEFINE_MUTEX(trace_eval_mutex);
162 
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171 	struct trace_eval_map		map;
172 	struct trace_eval_map_head	head;
173 	struct trace_eval_map_tail	tail;
174 };
175 
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178 
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181 				   struct trace_buffer *buffer,
182 				   unsigned int trace_ctx);
183 
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192 
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195 
set_cmdline_ftrace(char * str)196 static int __init set_cmdline_ftrace(char *str)
197 {
198 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199 	default_bootup_tracer = bootup_tracer_buf;
200 	/* We are using ftrace early, expand it */
201 	trace_set_ring_buffer_expanded(NULL);
202 	return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205 
ftrace_dump_on_oops_enabled(void)206 int ftrace_dump_on_oops_enabled(void)
207 {
208 	if (!strcmp("0", ftrace_dump_on_oops))
209 		return 0;
210 	else
211 		return 1;
212 }
213 
set_ftrace_dump_on_oops(char * str)214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216 	if (!*str) {
217 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218 		return 1;
219 	}
220 
221 	if (*str == ',') {
222 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224 		return 1;
225 	}
226 
227 	if (*str++ == '=') {
228 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229 		return 1;
230 	}
231 
232 	return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235 
stop_trace_on_warning(char * str)236 static int __init stop_trace_on_warning(char *str)
237 {
238 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239 		__disable_trace_on_warning = 1;
240 	return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243 
boot_alloc_snapshot(char * str)244 static int __init boot_alloc_snapshot(char *str)
245 {
246 	char *slot = boot_snapshot_info + boot_snapshot_index;
247 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248 	int ret;
249 
250 	if (str[0] == '=') {
251 		str++;
252 		if (strlen(str) >= left)
253 			return -1;
254 
255 		ret = snprintf(slot, left, "%s\t", str);
256 		boot_snapshot_index += ret;
257 	} else {
258 		allocate_snapshot = true;
259 		/* We also need the main ring buffer expanded */
260 		trace_set_ring_buffer_expanded(NULL);
261 	}
262 	return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265 
266 
boot_snapshot(char * str)267 static int __init boot_snapshot(char *str)
268 {
269 	snapshot_at_boot = true;
270 	boot_alloc_snapshot(str);
271 	return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274 
275 
boot_instance(char * str)276 static int __init boot_instance(char *str)
277 {
278 	char *slot = boot_instance_info + boot_instance_index;
279 	int left = sizeof(boot_instance_info) - boot_instance_index;
280 	int ret;
281 
282 	if (strlen(str) >= left)
283 		return -1;
284 
285 	ret = snprintf(slot, left, "%s\t", str);
286 	boot_instance_index += ret;
287 
288 	return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291 
292 
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294 
set_trace_boot_options(char * str)295 static int __init set_trace_boot_options(char *str)
296 {
297 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298 	return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301 
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304 
set_trace_boot_clock(char * str)305 static int __init set_trace_boot_clock(char *str)
306 {
307 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308 	trace_boot_clock = trace_boot_clock_buf;
309 	return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312 
set_tracepoint_printk(char * str)313 static int __init set_tracepoint_printk(char *str)
314 {
315 	/* Ignore the "tp_printk_stop_on_boot" param */
316 	if (*str == '_')
317 		return 0;
318 
319 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320 		tracepoint_printk = 1;
321 	return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324 
set_tracepoint_printk_stop(char * str)325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327 	tracepoint_printk_stop_on_boot = true;
328 	return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331 
ns2usecs(u64 nsec)332 unsigned long long ns2usecs(u64 nsec)
333 {
334 	nsec += 500;
335 	do_div(nsec, 1000);
336 	return nsec;
337 }
338 
339 static void
trace_process_export(struct trace_export * export,struct ring_buffer_event * event,int flag)340 trace_process_export(struct trace_export *export,
341 	       struct ring_buffer_event *event, int flag)
342 {
343 	struct trace_entry *entry;
344 	unsigned int size = 0;
345 
346 	if (export->flags & flag) {
347 		entry = ring_buffer_event_data(event);
348 		size = ring_buffer_event_length(event);
349 		export->write(export, entry, size);
350 	}
351 }
352 
353 static DEFINE_MUTEX(ftrace_export_lock);
354 
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356 
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360 
ftrace_exports_enable(struct trace_export * export)361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363 	if (export->flags & TRACE_EXPORT_FUNCTION)
364 		static_branch_inc(&trace_function_exports_enabled);
365 
366 	if (export->flags & TRACE_EXPORT_EVENT)
367 		static_branch_inc(&trace_event_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_MARKER)
370 		static_branch_inc(&trace_marker_exports_enabled);
371 }
372 
ftrace_exports_disable(struct trace_export * export)373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375 	if (export->flags & TRACE_EXPORT_FUNCTION)
376 		static_branch_dec(&trace_function_exports_enabled);
377 
378 	if (export->flags & TRACE_EXPORT_EVENT)
379 		static_branch_dec(&trace_event_exports_enabled);
380 
381 	if (export->flags & TRACE_EXPORT_MARKER)
382 		static_branch_dec(&trace_marker_exports_enabled);
383 }
384 
ftrace_exports(struct ring_buffer_event * event,int flag)385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387 	struct trace_export *export;
388 
389 	preempt_disable_notrace();
390 
391 	export = rcu_dereference_raw_check(ftrace_exports_list);
392 	while (export) {
393 		trace_process_export(export, event, flag);
394 		export = rcu_dereference_raw_check(export->next);
395 	}
396 
397 	preempt_enable_notrace();
398 }
399 
400 static inline void
add_trace_export(struct trace_export ** list,struct trace_export * export)401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403 	rcu_assign_pointer(export->next, *list);
404 	/*
405 	 * We are entering export into the list but another
406 	 * CPU might be walking that list. We need to make sure
407 	 * the export->next pointer is valid before another CPU sees
408 	 * the export pointer included into the list.
409 	 */
410 	rcu_assign_pointer(*list, export);
411 }
412 
413 static inline int
rm_trace_export(struct trace_export ** list,struct trace_export * export)414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416 	struct trace_export **p;
417 
418 	for (p = list; *p != NULL; p = &(*p)->next)
419 		if (*p == export)
420 			break;
421 
422 	if (*p != export)
423 		return -1;
424 
425 	rcu_assign_pointer(*p, (*p)->next);
426 
427 	return 0;
428 }
429 
430 static inline void
add_ftrace_export(struct trace_export ** list,struct trace_export * export)431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433 	ftrace_exports_enable(export);
434 
435 	add_trace_export(list, export);
436 }
437 
438 static inline int
rm_ftrace_export(struct trace_export ** list,struct trace_export * export)439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441 	int ret;
442 
443 	ret = rm_trace_export(list, export);
444 	ftrace_exports_disable(export);
445 
446 	return ret;
447 }
448 
register_ftrace_export(struct trace_export * export)449 int register_ftrace_export(struct trace_export *export)
450 {
451 	if (WARN_ON_ONCE(!export->write))
452 		return -1;
453 
454 	mutex_lock(&ftrace_export_lock);
455 
456 	add_ftrace_export(&ftrace_exports_list, export);
457 
458 	mutex_unlock(&ftrace_export_lock);
459 
460 	return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463 
unregister_ftrace_export(struct trace_export * export)464 int unregister_ftrace_export(struct trace_export *export)
465 {
466 	int ret;
467 
468 	mutex_lock(&ftrace_export_lock);
469 
470 	ret = rm_ftrace_export(&ftrace_exports_list, export);
471 
472 	mutex_unlock(&ftrace_export_lock);
473 
474 	return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477 
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS						\
480 	(FUNCTION_DEFAULT_FLAGS |					\
481 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
482 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
483 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
484 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
485 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
486 
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
489 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490 
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
494 
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500 	.trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502 
503 static struct trace_array *printk_trace = &global_trace;
504 
printk_binsafe(struct trace_array * tr)505 static __always_inline bool printk_binsafe(struct trace_array *tr)
506 {
507 	/*
508 	 * The binary format of traceprintk can cause a crash if used
509 	 * by a buffer from another boot. Force the use of the
510 	 * non binary version of trace_printk if the trace_printk
511 	 * buffer is a boot mapped ring buffer.
512 	 */
513 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
514 }
515 
update_printk_trace(struct trace_array * tr)516 static void update_printk_trace(struct trace_array *tr)
517 {
518 	if (printk_trace == tr)
519 		return;
520 
521 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
522 	printk_trace = tr;
523 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
524 }
525 
trace_set_ring_buffer_expanded(struct trace_array * tr)526 void trace_set_ring_buffer_expanded(struct trace_array *tr)
527 {
528 	if (!tr)
529 		tr = &global_trace;
530 	tr->ring_buffer_expanded = true;
531 }
532 
533 LIST_HEAD(ftrace_trace_arrays);
534 
trace_array_get(struct trace_array * this_tr)535 int trace_array_get(struct trace_array *this_tr)
536 {
537 	struct trace_array *tr;
538 	int ret = -ENODEV;
539 
540 	mutex_lock(&trace_types_lock);
541 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
542 		if (tr == this_tr) {
543 			tr->ref++;
544 			ret = 0;
545 			break;
546 		}
547 	}
548 	mutex_unlock(&trace_types_lock);
549 
550 	return ret;
551 }
552 
__trace_array_put(struct trace_array * this_tr)553 static void __trace_array_put(struct trace_array *this_tr)
554 {
555 	WARN_ON(!this_tr->ref);
556 	this_tr->ref--;
557 }
558 
559 /**
560  * trace_array_put - Decrement the reference counter for this trace array.
561  * @this_tr : pointer to the trace array
562  *
563  * NOTE: Use this when we no longer need the trace array returned by
564  * trace_array_get_by_name(). This ensures the trace array can be later
565  * destroyed.
566  *
567  */
trace_array_put(struct trace_array * this_tr)568 void trace_array_put(struct trace_array *this_tr)
569 {
570 	if (!this_tr)
571 		return;
572 
573 	mutex_lock(&trace_types_lock);
574 	__trace_array_put(this_tr);
575 	mutex_unlock(&trace_types_lock);
576 }
577 EXPORT_SYMBOL_GPL(trace_array_put);
578 
tracing_check_open_get_tr(struct trace_array * tr)579 int tracing_check_open_get_tr(struct trace_array *tr)
580 {
581 	int ret;
582 
583 	ret = security_locked_down(LOCKDOWN_TRACEFS);
584 	if (ret)
585 		return ret;
586 
587 	if (tracing_disabled)
588 		return -ENODEV;
589 
590 	if (tr && trace_array_get(tr) < 0)
591 		return -ENODEV;
592 
593 	return 0;
594 }
595 
596 /**
597  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
598  * @filtered_pids: The list of pids to check
599  * @search_pid: The PID to find in @filtered_pids
600  *
601  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
602  */
603 bool
trace_find_filtered_pid(struct trace_pid_list * filtered_pids,pid_t search_pid)604 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
605 {
606 	return trace_pid_list_is_set(filtered_pids, search_pid);
607 }
608 
609 /**
610  * trace_ignore_this_task - should a task be ignored for tracing
611  * @filtered_pids: The list of pids to check
612  * @filtered_no_pids: The list of pids not to be traced
613  * @task: The task that should be ignored if not filtered
614  *
615  * Checks if @task should be traced or not from @filtered_pids.
616  * Returns true if @task should *NOT* be traced.
617  * Returns false if @task should be traced.
618  */
619 bool
trace_ignore_this_task(struct trace_pid_list * filtered_pids,struct trace_pid_list * filtered_no_pids,struct task_struct * task)620 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
621 		       struct trace_pid_list *filtered_no_pids,
622 		       struct task_struct *task)
623 {
624 	/*
625 	 * If filtered_no_pids is not empty, and the task's pid is listed
626 	 * in filtered_no_pids, then return true.
627 	 * Otherwise, if filtered_pids is empty, that means we can
628 	 * trace all tasks. If it has content, then only trace pids
629 	 * within filtered_pids.
630 	 */
631 
632 	return (filtered_pids &&
633 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
634 		(filtered_no_pids &&
635 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
636 }
637 
638 /**
639  * trace_filter_add_remove_task - Add or remove a task from a pid_list
640  * @pid_list: The list to modify
641  * @self: The current task for fork or NULL for exit
642  * @task: The task to add or remove
643  *
644  * If adding a task, if @self is defined, the task is only added if @self
645  * is also included in @pid_list. This happens on fork and tasks should
646  * only be added when the parent is listed. If @self is NULL, then the
647  * @task pid will be removed from the list, which would happen on exit
648  * of a task.
649  */
trace_filter_add_remove_task(struct trace_pid_list * pid_list,struct task_struct * self,struct task_struct * task)650 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
651 				  struct task_struct *self,
652 				  struct task_struct *task)
653 {
654 	if (!pid_list)
655 		return;
656 
657 	/* For forks, we only add if the forking task is listed */
658 	if (self) {
659 		if (!trace_find_filtered_pid(pid_list, self->pid))
660 			return;
661 	}
662 
663 	/* "self" is set for forks, and NULL for exits */
664 	if (self)
665 		trace_pid_list_set(pid_list, task->pid);
666 	else
667 		trace_pid_list_clear(pid_list, task->pid);
668 }
669 
670 /**
671  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
672  * @pid_list: The pid list to show
673  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
674  * @pos: The position of the file
675  *
676  * This is used by the seq_file "next" operation to iterate the pids
677  * listed in a trace_pid_list structure.
678  *
679  * Returns the pid+1 as we want to display pid of zero, but NULL would
680  * stop the iteration.
681  */
trace_pid_next(struct trace_pid_list * pid_list,void * v,loff_t * pos)682 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
683 {
684 	long pid = (unsigned long)v;
685 	unsigned int next;
686 
687 	(*pos)++;
688 
689 	/* pid already is +1 of the actual previous bit */
690 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
691 		return NULL;
692 
693 	pid = next;
694 
695 	/* Return pid + 1 to allow zero to be represented */
696 	return (void *)(pid + 1);
697 }
698 
699 /**
700  * trace_pid_start - Used for seq_file to start reading pid lists
701  * @pid_list: The pid list to show
702  * @pos: The position of the file
703  *
704  * This is used by seq_file "start" operation to start the iteration
705  * of listing pids.
706  *
707  * Returns the pid+1 as we want to display pid of zero, but NULL would
708  * stop the iteration.
709  */
trace_pid_start(struct trace_pid_list * pid_list,loff_t * pos)710 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
711 {
712 	unsigned long pid;
713 	unsigned int first;
714 	loff_t l = 0;
715 
716 	if (trace_pid_list_first(pid_list, &first) < 0)
717 		return NULL;
718 
719 	pid = first;
720 
721 	/* Return pid + 1 so that zero can be the exit value */
722 	for (pid++; pid && l < *pos;
723 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
724 		;
725 	return (void *)pid;
726 }
727 
728 /**
729  * trace_pid_show - show the current pid in seq_file processing
730  * @m: The seq_file structure to write into
731  * @v: A void pointer of the pid (+1) value to display
732  *
733  * Can be directly used by seq_file operations to display the current
734  * pid value.
735  */
trace_pid_show(struct seq_file * m,void * v)736 int trace_pid_show(struct seq_file *m, void *v)
737 {
738 	unsigned long pid = (unsigned long)v - 1;
739 
740 	seq_printf(m, "%lu\n", pid);
741 	return 0;
742 }
743 
744 /* 128 should be much more than enough */
745 #define PID_BUF_SIZE		127
746 
trace_pid_write(struct trace_pid_list * filtered_pids,struct trace_pid_list ** new_pid_list,const char __user * ubuf,size_t cnt)747 int trace_pid_write(struct trace_pid_list *filtered_pids,
748 		    struct trace_pid_list **new_pid_list,
749 		    const char __user *ubuf, size_t cnt)
750 {
751 	struct trace_pid_list *pid_list;
752 	struct trace_parser parser;
753 	unsigned long val;
754 	int nr_pids = 0;
755 	ssize_t read = 0;
756 	ssize_t ret;
757 	loff_t pos;
758 	pid_t pid;
759 
760 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
761 		return -ENOMEM;
762 
763 	/*
764 	 * Always recreate a new array. The write is an all or nothing
765 	 * operation. Always create a new array when adding new pids by
766 	 * the user. If the operation fails, then the current list is
767 	 * not modified.
768 	 */
769 	pid_list = trace_pid_list_alloc();
770 	if (!pid_list) {
771 		trace_parser_put(&parser);
772 		return -ENOMEM;
773 	}
774 
775 	if (filtered_pids) {
776 		/* copy the current bits to the new max */
777 		ret = trace_pid_list_first(filtered_pids, &pid);
778 		while (!ret) {
779 			trace_pid_list_set(pid_list, pid);
780 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
781 			nr_pids++;
782 		}
783 	}
784 
785 	ret = 0;
786 	while (cnt > 0) {
787 
788 		pos = 0;
789 
790 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
791 		if (ret < 0)
792 			break;
793 
794 		read += ret;
795 		ubuf += ret;
796 		cnt -= ret;
797 
798 		if (!trace_parser_loaded(&parser))
799 			break;
800 
801 		ret = -EINVAL;
802 		if (kstrtoul(parser.buffer, 0, &val))
803 			break;
804 
805 		pid = (pid_t)val;
806 
807 		if (trace_pid_list_set(pid_list, pid) < 0) {
808 			ret = -1;
809 			break;
810 		}
811 		nr_pids++;
812 
813 		trace_parser_clear(&parser);
814 		ret = 0;
815 	}
816 	trace_parser_put(&parser);
817 
818 	if (ret < 0) {
819 		trace_pid_list_free(pid_list);
820 		return ret;
821 	}
822 
823 	if (!nr_pids) {
824 		/* Cleared the list of pids */
825 		trace_pid_list_free(pid_list);
826 		pid_list = NULL;
827 	}
828 
829 	*new_pid_list = pid_list;
830 
831 	return read;
832 }
833 
buffer_ftrace_now(struct array_buffer * buf,int cpu)834 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
835 {
836 	u64 ts;
837 
838 	/* Early boot up does not have a buffer yet */
839 	if (!buf->buffer)
840 		return trace_clock_local();
841 
842 	ts = ring_buffer_time_stamp(buf->buffer);
843 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
844 
845 	return ts;
846 }
847 
ftrace_now(int cpu)848 u64 ftrace_now(int cpu)
849 {
850 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
851 }
852 
853 /**
854  * tracing_is_enabled - Show if global_trace has been enabled
855  *
856  * Shows if the global trace has been enabled or not. It uses the
857  * mirror flag "buffer_disabled" to be used in fast paths such as for
858  * the irqsoff tracer. But it may be inaccurate due to races. If you
859  * need to know the accurate state, use tracing_is_on() which is a little
860  * slower, but accurate.
861  */
tracing_is_enabled(void)862 int tracing_is_enabled(void)
863 {
864 	/*
865 	 * For quick access (irqsoff uses this in fast path), just
866 	 * return the mirror variable of the state of the ring buffer.
867 	 * It's a little racy, but we don't really care.
868 	 */
869 	smp_rmb();
870 	return !global_trace.buffer_disabled;
871 }
872 
873 /*
874  * trace_buf_size is the size in bytes that is allocated
875  * for a buffer. Note, the number of bytes is always rounded
876  * to page size.
877  *
878  * This number is purposely set to a low number of 16384.
879  * If the dump on oops happens, it will be much appreciated
880  * to not have to wait for all that output. Anyway this can be
881  * boot time and run time configurable.
882  */
883 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
884 
885 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
886 
887 /* trace_types holds a link list of available tracers. */
888 static struct tracer		*trace_types __read_mostly;
889 
890 /*
891  * trace_types_lock is used to protect the trace_types list.
892  */
893 DEFINE_MUTEX(trace_types_lock);
894 
895 /*
896  * serialize the access of the ring buffer
897  *
898  * ring buffer serializes readers, but it is low level protection.
899  * The validity of the events (which returns by ring_buffer_peek() ..etc)
900  * are not protected by ring buffer.
901  *
902  * The content of events may become garbage if we allow other process consumes
903  * these events concurrently:
904  *   A) the page of the consumed events may become a normal page
905  *      (not reader page) in ring buffer, and this page will be rewritten
906  *      by events producer.
907  *   B) The page of the consumed events may become a page for splice_read,
908  *      and this page will be returned to system.
909  *
910  * These primitives allow multi process access to different cpu ring buffer
911  * concurrently.
912  *
913  * These primitives don't distinguish read-only and read-consume access.
914  * Multi read-only access are also serialized.
915  */
916 
917 #ifdef CONFIG_SMP
918 static DECLARE_RWSEM(all_cpu_access_lock);
919 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
920 
trace_access_lock(int cpu)921 static inline void trace_access_lock(int cpu)
922 {
923 	if (cpu == RING_BUFFER_ALL_CPUS) {
924 		/* gain it for accessing the whole ring buffer. */
925 		down_write(&all_cpu_access_lock);
926 	} else {
927 		/* gain it for accessing a cpu ring buffer. */
928 
929 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
930 		down_read(&all_cpu_access_lock);
931 
932 		/* Secondly block other access to this @cpu ring buffer. */
933 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
934 	}
935 }
936 
trace_access_unlock(int cpu)937 static inline void trace_access_unlock(int cpu)
938 {
939 	if (cpu == RING_BUFFER_ALL_CPUS) {
940 		up_write(&all_cpu_access_lock);
941 	} else {
942 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
943 		up_read(&all_cpu_access_lock);
944 	}
945 }
946 
trace_access_lock_init(void)947 static inline void trace_access_lock_init(void)
948 {
949 	int cpu;
950 
951 	for_each_possible_cpu(cpu)
952 		mutex_init(&per_cpu(cpu_access_lock, cpu));
953 }
954 
955 #else
956 
957 static DEFINE_MUTEX(access_lock);
958 
trace_access_lock(int cpu)959 static inline void trace_access_lock(int cpu)
960 {
961 	(void)cpu;
962 	mutex_lock(&access_lock);
963 }
964 
trace_access_unlock(int cpu)965 static inline void trace_access_unlock(int cpu)
966 {
967 	(void)cpu;
968 	mutex_unlock(&access_lock);
969 }
970 
trace_access_lock_init(void)971 static inline void trace_access_lock_init(void)
972 {
973 }
974 
975 #endif
976 
977 #ifdef CONFIG_STACKTRACE
978 static void __ftrace_trace_stack(struct trace_array *tr,
979 				 struct trace_buffer *buffer,
980 				 unsigned int trace_ctx,
981 				 int skip, struct pt_regs *regs);
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983 				      struct trace_buffer *buffer,
984 				      unsigned int trace_ctx,
985 				      int skip, struct pt_regs *regs);
986 
987 #else
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)988 static inline void __ftrace_trace_stack(struct trace_array *tr,
989 					struct trace_buffer *buffer,
990 					unsigned int trace_ctx,
991 					int skip, struct pt_regs *regs)
992 {
993 }
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned long trace_ctx,int skip,struct pt_regs * regs)994 static inline void ftrace_trace_stack(struct trace_array *tr,
995 				      struct trace_buffer *buffer,
996 				      unsigned long trace_ctx,
997 				      int skip, struct pt_regs *regs)
998 {
999 }
1000 
1001 #endif
1002 
1003 static __always_inline void
trace_event_setup(struct ring_buffer_event * event,int type,unsigned int trace_ctx)1004 trace_event_setup(struct ring_buffer_event *event,
1005 		  int type, unsigned int trace_ctx)
1006 {
1007 	struct trace_entry *ent = ring_buffer_event_data(event);
1008 
1009 	tracing_generic_entry_update(ent, type, trace_ctx);
1010 }
1011 
1012 static __always_inline struct ring_buffer_event *
__trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)1013 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1014 			  int type,
1015 			  unsigned long len,
1016 			  unsigned int trace_ctx)
1017 {
1018 	struct ring_buffer_event *event;
1019 
1020 	event = ring_buffer_lock_reserve(buffer, len);
1021 	if (event != NULL)
1022 		trace_event_setup(event, type, trace_ctx);
1023 
1024 	return event;
1025 }
1026 
tracer_tracing_on(struct trace_array * tr)1027 void tracer_tracing_on(struct trace_array *tr)
1028 {
1029 	if (tr->array_buffer.buffer)
1030 		ring_buffer_record_on(tr->array_buffer.buffer);
1031 	/*
1032 	 * This flag is looked at when buffers haven't been allocated
1033 	 * yet, or by some tracers (like irqsoff), that just want to
1034 	 * know if the ring buffer has been disabled, but it can handle
1035 	 * races of where it gets disabled but we still do a record.
1036 	 * As the check is in the fast path of the tracers, it is more
1037 	 * important to be fast than accurate.
1038 	 */
1039 	tr->buffer_disabled = 0;
1040 	/* Make the flag seen by readers */
1041 	smp_wmb();
1042 }
1043 
1044 /**
1045  * tracing_on - enable tracing buffers
1046  *
1047  * This function enables tracing buffers that may have been
1048  * disabled with tracing_off.
1049  */
tracing_on(void)1050 void tracing_on(void)
1051 {
1052 	tracer_tracing_on(&global_trace);
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_on);
1055 
1056 
1057 static __always_inline void
__buffer_unlock_commit(struct trace_buffer * buffer,struct ring_buffer_event * event)1058 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1059 {
1060 	__this_cpu_write(trace_taskinfo_save, true);
1061 
1062 	/* If this is the temp buffer, we need to commit fully */
1063 	if (this_cpu_read(trace_buffered_event) == event) {
1064 		/* Length is in event->array[0] */
1065 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1066 		/* Release the temp buffer */
1067 		this_cpu_dec(trace_buffered_event_cnt);
1068 		/* ring_buffer_unlock_commit() enables preemption */
1069 		preempt_enable_notrace();
1070 	} else
1071 		ring_buffer_unlock_commit(buffer);
1072 }
1073 
__trace_array_puts(struct trace_array * tr,unsigned long ip,const char * str,int size)1074 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1075 		       const char *str, int size)
1076 {
1077 	struct ring_buffer_event *event;
1078 	struct trace_buffer *buffer;
1079 	struct print_entry *entry;
1080 	unsigned int trace_ctx;
1081 	int alloc;
1082 
1083 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1084 		return 0;
1085 
1086 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1087 		return 0;
1088 
1089 	if (unlikely(tracing_disabled))
1090 		return 0;
1091 
1092 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1093 
1094 	trace_ctx = tracing_gen_ctx();
1095 	buffer = tr->array_buffer.buffer;
1096 	ring_buffer_nest_start(buffer);
1097 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1098 					    trace_ctx);
1099 	if (!event) {
1100 		size = 0;
1101 		goto out;
1102 	}
1103 
1104 	entry = ring_buffer_event_data(event);
1105 	entry->ip = ip;
1106 
1107 	memcpy(&entry->buf, str, size);
1108 
1109 	/* Add a newline if necessary */
1110 	if (entry->buf[size - 1] != '\n') {
1111 		entry->buf[size] = '\n';
1112 		entry->buf[size + 1] = '\0';
1113 	} else
1114 		entry->buf[size] = '\0';
1115 
1116 	__buffer_unlock_commit(buffer, event);
1117 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1118  out:
1119 	ring_buffer_nest_end(buffer);
1120 	return size;
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_array_puts);
1123 
1124 /**
1125  * __trace_puts - write a constant string into the trace buffer.
1126  * @ip:	   The address of the caller
1127  * @str:   The constant string to write
1128  * @size:  The size of the string.
1129  */
__trace_puts(unsigned long ip,const char * str,int size)1130 int __trace_puts(unsigned long ip, const char *str, int size)
1131 {
1132 	return __trace_array_puts(printk_trace, ip, str, size);
1133 }
1134 EXPORT_SYMBOL_GPL(__trace_puts);
1135 
1136 /**
1137  * __trace_bputs - write the pointer to a constant string into trace buffer
1138  * @ip:	   The address of the caller
1139  * @str:   The constant string to write to the buffer to
1140  */
__trace_bputs(unsigned long ip,const char * str)1141 int __trace_bputs(unsigned long ip, const char *str)
1142 {
1143 	struct trace_array *tr = READ_ONCE(printk_trace);
1144 	struct ring_buffer_event *event;
1145 	struct trace_buffer *buffer;
1146 	struct bputs_entry *entry;
1147 	unsigned int trace_ctx;
1148 	int size = sizeof(struct bputs_entry);
1149 	int ret = 0;
1150 
1151 	if (!printk_binsafe(tr))
1152 		return __trace_puts(ip, str, strlen(str));
1153 
1154 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1155 		return 0;
1156 
1157 	if (unlikely(tracing_selftest_running || tracing_disabled))
1158 		return 0;
1159 
1160 	trace_ctx = tracing_gen_ctx();
1161 	buffer = tr->array_buffer.buffer;
1162 
1163 	ring_buffer_nest_start(buffer);
1164 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1165 					    trace_ctx);
1166 	if (!event)
1167 		goto out;
1168 
1169 	entry = ring_buffer_event_data(event);
1170 	entry->ip			= ip;
1171 	entry->str			= str;
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 
1176 	ret = 1;
1177  out:
1178 	ring_buffer_nest_end(buffer);
1179 	return ret;
1180 }
1181 EXPORT_SYMBOL_GPL(__trace_bputs);
1182 
1183 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_instance_cond(struct trace_array * tr,void * cond_data)1184 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1185 					   void *cond_data)
1186 {
1187 	struct tracer *tracer = tr->current_trace;
1188 	unsigned long flags;
1189 
1190 	if (in_nmi()) {
1191 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1192 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1193 		return;
1194 	}
1195 
1196 	if (!tr->allocated_snapshot) {
1197 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1198 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1199 		tracer_tracing_off(tr);
1200 		return;
1201 	}
1202 
1203 	/* Note, snapshot can not be used when the tracer uses it */
1204 	if (tracer->use_max_tr) {
1205 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1206 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1207 		return;
1208 	}
1209 
1210 	if (tr->mapped) {
1211 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1212 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213 		return;
1214 	}
1215 
1216 	local_irq_save(flags);
1217 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1218 	local_irq_restore(flags);
1219 }
1220 
tracing_snapshot_instance(struct trace_array * tr)1221 void tracing_snapshot_instance(struct trace_array *tr)
1222 {
1223 	tracing_snapshot_instance_cond(tr, NULL);
1224 }
1225 
1226 /**
1227  * tracing_snapshot - take a snapshot of the current buffer.
1228  *
1229  * This causes a swap between the snapshot buffer and the current live
1230  * tracing buffer. You can use this to take snapshots of the live
1231  * trace when some condition is triggered, but continue to trace.
1232  *
1233  * Note, make sure to allocate the snapshot with either
1234  * a tracing_snapshot_alloc(), or by doing it manually
1235  * with: echo 1 > /sys/kernel/tracing/snapshot
1236  *
1237  * If the snapshot buffer is not allocated, it will stop tracing.
1238  * Basically making a permanent snapshot.
1239  */
tracing_snapshot(void)1240 void tracing_snapshot(void)
1241 {
1242 	struct trace_array *tr = &global_trace;
1243 
1244 	tracing_snapshot_instance(tr);
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot);
1247 
1248 /**
1249  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1250  * @tr:		The tracing instance to snapshot
1251  * @cond_data:	The data to be tested conditionally, and possibly saved
1252  *
1253  * This is the same as tracing_snapshot() except that the snapshot is
1254  * conditional - the snapshot will only happen if the
1255  * cond_snapshot.update() implementation receiving the cond_data
1256  * returns true, which means that the trace array's cond_snapshot
1257  * update() operation used the cond_data to determine whether the
1258  * snapshot should be taken, and if it was, presumably saved it along
1259  * with the snapshot.
1260  */
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1261 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1262 {
1263 	tracing_snapshot_instance_cond(tr, cond_data);
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1266 
1267 /**
1268  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1269  * @tr:		The tracing instance
1270  *
1271  * When the user enables a conditional snapshot using
1272  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1273  * with the snapshot.  This accessor is used to retrieve it.
1274  *
1275  * Should not be called from cond_snapshot.update(), since it takes
1276  * the tr->max_lock lock, which the code calling
1277  * cond_snapshot.update() has already done.
1278  *
1279  * Returns the cond_data associated with the trace array's snapshot.
1280  */
tracing_cond_snapshot_data(struct trace_array * tr)1281 void *tracing_cond_snapshot_data(struct trace_array *tr)
1282 {
1283 	void *cond_data = NULL;
1284 
1285 	local_irq_disable();
1286 	arch_spin_lock(&tr->max_lock);
1287 
1288 	if (tr->cond_snapshot)
1289 		cond_data = tr->cond_snapshot->cond_data;
1290 
1291 	arch_spin_unlock(&tr->max_lock);
1292 	local_irq_enable();
1293 
1294 	return cond_data;
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1297 
1298 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1299 					struct array_buffer *size_buf, int cpu_id);
1300 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1301 
tracing_alloc_snapshot_instance(struct trace_array * tr)1302 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1303 {
1304 	int order;
1305 	int ret;
1306 
1307 	if (!tr->allocated_snapshot) {
1308 
1309 		/* Make the snapshot buffer have the same order as main buffer */
1310 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1311 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1312 		if (ret < 0)
1313 			return ret;
1314 
1315 		/* allocate spare buffer */
1316 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1317 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1318 		if (ret < 0)
1319 			return ret;
1320 
1321 		tr->allocated_snapshot = true;
1322 	}
1323 
1324 	return 0;
1325 }
1326 
free_snapshot(struct trace_array * tr)1327 static void free_snapshot(struct trace_array *tr)
1328 {
1329 	/*
1330 	 * We don't free the ring buffer. instead, resize it because
1331 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1332 	 * we want preserve it.
1333 	 */
1334 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1335 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1336 	set_buffer_entries(&tr->max_buffer, 1);
1337 	tracing_reset_online_cpus(&tr->max_buffer);
1338 	tr->allocated_snapshot = false;
1339 }
1340 
tracing_arm_snapshot_locked(struct trace_array * tr)1341 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1342 {
1343 	int ret;
1344 
1345 	lockdep_assert_held(&trace_types_lock);
1346 
1347 	spin_lock(&tr->snapshot_trigger_lock);
1348 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1349 		spin_unlock(&tr->snapshot_trigger_lock);
1350 		return -EBUSY;
1351 	}
1352 
1353 	tr->snapshot++;
1354 	spin_unlock(&tr->snapshot_trigger_lock);
1355 
1356 	ret = tracing_alloc_snapshot_instance(tr);
1357 	if (ret) {
1358 		spin_lock(&tr->snapshot_trigger_lock);
1359 		tr->snapshot--;
1360 		spin_unlock(&tr->snapshot_trigger_lock);
1361 	}
1362 
1363 	return ret;
1364 }
1365 
tracing_arm_snapshot(struct trace_array * tr)1366 int tracing_arm_snapshot(struct trace_array *tr)
1367 {
1368 	int ret;
1369 
1370 	mutex_lock(&trace_types_lock);
1371 	ret = tracing_arm_snapshot_locked(tr);
1372 	mutex_unlock(&trace_types_lock);
1373 
1374 	return ret;
1375 }
1376 
tracing_disarm_snapshot(struct trace_array * tr)1377 void tracing_disarm_snapshot(struct trace_array *tr)
1378 {
1379 	spin_lock(&tr->snapshot_trigger_lock);
1380 	if (!WARN_ON(!tr->snapshot))
1381 		tr->snapshot--;
1382 	spin_unlock(&tr->snapshot_trigger_lock);
1383 }
1384 
1385 /**
1386  * tracing_alloc_snapshot - allocate snapshot buffer.
1387  *
1388  * This only allocates the snapshot buffer if it isn't already
1389  * allocated - it doesn't also take a snapshot.
1390  *
1391  * This is meant to be used in cases where the snapshot buffer needs
1392  * to be set up for events that can't sleep but need to be able to
1393  * trigger a snapshot.
1394  */
tracing_alloc_snapshot(void)1395 int tracing_alloc_snapshot(void)
1396 {
1397 	struct trace_array *tr = &global_trace;
1398 	int ret;
1399 
1400 	ret = tracing_alloc_snapshot_instance(tr);
1401 	WARN_ON(ret < 0);
1402 
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1406 
1407 /**
1408  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1409  *
1410  * This is similar to tracing_snapshot(), but it will allocate the
1411  * snapshot buffer if it isn't already allocated. Use this only
1412  * where it is safe to sleep, as the allocation may sleep.
1413  *
1414  * This causes a swap between the snapshot buffer and the current live
1415  * tracing buffer. You can use this to take snapshots of the live
1416  * trace when some condition is triggered, but continue to trace.
1417  */
tracing_snapshot_alloc(void)1418 void tracing_snapshot_alloc(void)
1419 {
1420 	int ret;
1421 
1422 	ret = tracing_alloc_snapshot();
1423 	if (ret < 0)
1424 		return;
1425 
1426 	tracing_snapshot();
1427 }
1428 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1429 
1430 /**
1431  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1432  * @tr:		The tracing instance
1433  * @cond_data:	User data to associate with the snapshot
1434  * @update:	Implementation of the cond_snapshot update function
1435  *
1436  * Check whether the conditional snapshot for the given instance has
1437  * already been enabled, or if the current tracer is already using a
1438  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1439  * save the cond_data and update function inside.
1440  *
1441  * Returns 0 if successful, error otherwise.
1442  */
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1443 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1444 				 cond_update_fn_t update)
1445 {
1446 	struct cond_snapshot *cond_snapshot;
1447 	int ret = 0;
1448 
1449 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1450 	if (!cond_snapshot)
1451 		return -ENOMEM;
1452 
1453 	cond_snapshot->cond_data = cond_data;
1454 	cond_snapshot->update = update;
1455 
1456 	mutex_lock(&trace_types_lock);
1457 
1458 	if (tr->current_trace->use_max_tr) {
1459 		ret = -EBUSY;
1460 		goto fail_unlock;
1461 	}
1462 
1463 	/*
1464 	 * The cond_snapshot can only change to NULL without the
1465 	 * trace_types_lock. We don't care if we race with it going
1466 	 * to NULL, but we want to make sure that it's not set to
1467 	 * something other than NULL when we get here, which we can
1468 	 * do safely with only holding the trace_types_lock and not
1469 	 * having to take the max_lock.
1470 	 */
1471 	if (tr->cond_snapshot) {
1472 		ret = -EBUSY;
1473 		goto fail_unlock;
1474 	}
1475 
1476 	ret = tracing_arm_snapshot_locked(tr);
1477 	if (ret)
1478 		goto fail_unlock;
1479 
1480 	local_irq_disable();
1481 	arch_spin_lock(&tr->max_lock);
1482 	tr->cond_snapshot = cond_snapshot;
1483 	arch_spin_unlock(&tr->max_lock);
1484 	local_irq_enable();
1485 
1486 	mutex_unlock(&trace_types_lock);
1487 
1488 	return ret;
1489 
1490  fail_unlock:
1491 	mutex_unlock(&trace_types_lock);
1492 	kfree(cond_snapshot);
1493 	return ret;
1494 }
1495 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1496 
1497 /**
1498  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1499  * @tr:		The tracing instance
1500  *
1501  * Check whether the conditional snapshot for the given instance is
1502  * enabled; if so, free the cond_snapshot associated with it,
1503  * otherwise return -EINVAL.
1504  *
1505  * Returns 0 if successful, error otherwise.
1506  */
tracing_snapshot_cond_disable(struct trace_array * tr)1507 int tracing_snapshot_cond_disable(struct trace_array *tr)
1508 {
1509 	int ret = 0;
1510 
1511 	local_irq_disable();
1512 	arch_spin_lock(&tr->max_lock);
1513 
1514 	if (!tr->cond_snapshot)
1515 		ret = -EINVAL;
1516 	else {
1517 		kfree(tr->cond_snapshot);
1518 		tr->cond_snapshot = NULL;
1519 	}
1520 
1521 	arch_spin_unlock(&tr->max_lock);
1522 	local_irq_enable();
1523 
1524 	tracing_disarm_snapshot(tr);
1525 
1526 	return ret;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1529 #else
tracing_snapshot(void)1530 void tracing_snapshot(void)
1531 {
1532 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_snapshot);
tracing_snapshot_cond(struct trace_array * tr,void * cond_data)1535 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1536 {
1537 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1538 }
1539 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
tracing_alloc_snapshot(void)1540 int tracing_alloc_snapshot(void)
1541 {
1542 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1543 	return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
tracing_snapshot_alloc(void)1546 void tracing_snapshot_alloc(void)
1547 {
1548 	/* Give warning */
1549 	tracing_snapshot();
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
tracing_cond_snapshot_data(struct trace_array * tr)1552 void *tracing_cond_snapshot_data(struct trace_array *tr)
1553 {
1554 	return NULL;
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
tracing_snapshot_cond_enable(struct trace_array * tr,void * cond_data,cond_update_fn_t update)1557 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1558 {
1559 	return -ENODEV;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
tracing_snapshot_cond_disable(struct trace_array * tr)1562 int tracing_snapshot_cond_disable(struct trace_array *tr)
1563 {
1564 	return false;
1565 }
1566 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1567 #define free_snapshot(tr)	do { } while (0)
1568 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1569 #endif /* CONFIG_TRACER_SNAPSHOT */
1570 
tracer_tracing_off(struct trace_array * tr)1571 void tracer_tracing_off(struct trace_array *tr)
1572 {
1573 	if (tr->array_buffer.buffer)
1574 		ring_buffer_record_off(tr->array_buffer.buffer);
1575 	/*
1576 	 * This flag is looked at when buffers haven't been allocated
1577 	 * yet, or by some tracers (like irqsoff), that just want to
1578 	 * know if the ring buffer has been disabled, but it can handle
1579 	 * races of where it gets disabled but we still do a record.
1580 	 * As the check is in the fast path of the tracers, it is more
1581 	 * important to be fast than accurate.
1582 	 */
1583 	tr->buffer_disabled = 1;
1584 	/* Make the flag seen by readers */
1585 	smp_wmb();
1586 }
1587 
1588 /**
1589  * tracing_off - turn off tracing buffers
1590  *
1591  * This function stops the tracing buffers from recording data.
1592  * It does not disable any overhead the tracers themselves may
1593  * be causing. This function simply causes all recording to
1594  * the ring buffers to fail.
1595  */
tracing_off(void)1596 void tracing_off(void)
1597 {
1598 	tracer_tracing_off(&global_trace);
1599 }
1600 EXPORT_SYMBOL_GPL(tracing_off);
1601 
disable_trace_on_warning(void)1602 void disable_trace_on_warning(void)
1603 {
1604 	if (__disable_trace_on_warning) {
1605 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1606 			"Disabling tracing due to warning\n");
1607 		tracing_off();
1608 	}
1609 }
1610 
1611 /**
1612  * tracer_tracing_is_on - show real state of ring buffer enabled
1613  * @tr : the trace array to know if ring buffer is enabled
1614  *
1615  * Shows real state of the ring buffer if it is enabled or not.
1616  */
tracer_tracing_is_on(struct trace_array * tr)1617 bool tracer_tracing_is_on(struct trace_array *tr)
1618 {
1619 	if (tr->array_buffer.buffer)
1620 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1621 	return !tr->buffer_disabled;
1622 }
1623 
1624 /**
1625  * tracing_is_on - show state of ring buffers enabled
1626  */
tracing_is_on(void)1627 int tracing_is_on(void)
1628 {
1629 	return tracer_tracing_is_on(&global_trace);
1630 }
1631 EXPORT_SYMBOL_GPL(tracing_is_on);
1632 
set_buf_size(char * str)1633 static int __init set_buf_size(char *str)
1634 {
1635 	unsigned long buf_size;
1636 
1637 	if (!str)
1638 		return 0;
1639 	buf_size = memparse(str, &str);
1640 	/*
1641 	 * nr_entries can not be zero and the startup
1642 	 * tests require some buffer space. Therefore
1643 	 * ensure we have at least 4096 bytes of buffer.
1644 	 */
1645 	trace_buf_size = max(4096UL, buf_size);
1646 	return 1;
1647 }
1648 __setup("trace_buf_size=", set_buf_size);
1649 
set_tracing_thresh(char * str)1650 static int __init set_tracing_thresh(char *str)
1651 {
1652 	unsigned long threshold;
1653 	int ret;
1654 
1655 	if (!str)
1656 		return 0;
1657 	ret = kstrtoul(str, 0, &threshold);
1658 	if (ret < 0)
1659 		return 0;
1660 	tracing_thresh = threshold * 1000;
1661 	return 1;
1662 }
1663 __setup("tracing_thresh=", set_tracing_thresh);
1664 
nsecs_to_usecs(unsigned long nsecs)1665 unsigned long nsecs_to_usecs(unsigned long nsecs)
1666 {
1667 	return nsecs / 1000;
1668 }
1669 
1670 /*
1671  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1672  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1673  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1674  * of strings in the order that the evals (enum) were defined.
1675  */
1676 #undef C
1677 #define C(a, b) b
1678 
1679 /* These must match the bit positions in trace_iterator_flags */
1680 static const char *trace_options[] = {
1681 	TRACE_FLAGS
1682 	NULL
1683 };
1684 
1685 static struct {
1686 	u64 (*func)(void);
1687 	const char *name;
1688 	int in_ns;		/* is this clock in nanoseconds? */
1689 } trace_clocks[] = {
1690 	{ trace_clock_local,		"local",	1 },
1691 	{ trace_clock_global,		"global",	1 },
1692 	{ trace_clock_counter,		"counter",	0 },
1693 	{ trace_clock_jiffies,		"uptime",	0 },
1694 	{ trace_clock,			"perf",		1 },
1695 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1696 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1697 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1698 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1699 	ARCH_TRACE_CLOCKS
1700 };
1701 
trace_clock_in_ns(struct trace_array * tr)1702 bool trace_clock_in_ns(struct trace_array *tr)
1703 {
1704 	if (trace_clocks[tr->clock_id].in_ns)
1705 		return true;
1706 
1707 	return false;
1708 }
1709 
1710 /*
1711  * trace_parser_get_init - gets the buffer for trace parser
1712  */
trace_parser_get_init(struct trace_parser * parser,int size)1713 int trace_parser_get_init(struct trace_parser *parser, int size)
1714 {
1715 	memset(parser, 0, sizeof(*parser));
1716 
1717 	parser->buffer = kmalloc(size, GFP_KERNEL);
1718 	if (!parser->buffer)
1719 		return 1;
1720 
1721 	parser->size = size;
1722 	return 0;
1723 }
1724 
1725 /*
1726  * trace_parser_put - frees the buffer for trace parser
1727  */
trace_parser_put(struct trace_parser * parser)1728 void trace_parser_put(struct trace_parser *parser)
1729 {
1730 	kfree(parser->buffer);
1731 	parser->buffer = NULL;
1732 }
1733 
1734 /*
1735  * trace_get_user - reads the user input string separated by  space
1736  * (matched by isspace(ch))
1737  *
1738  * For each string found the 'struct trace_parser' is updated,
1739  * and the function returns.
1740  *
1741  * Returns number of bytes read.
1742  *
1743  * See kernel/trace/trace.h for 'struct trace_parser' details.
1744  */
trace_get_user(struct trace_parser * parser,const char __user * ubuf,size_t cnt,loff_t * ppos)1745 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1746 	size_t cnt, loff_t *ppos)
1747 {
1748 	char ch;
1749 	size_t read = 0;
1750 	ssize_t ret;
1751 
1752 	if (!*ppos)
1753 		trace_parser_clear(parser);
1754 
1755 	ret = get_user(ch, ubuf++);
1756 	if (ret)
1757 		goto out;
1758 
1759 	read++;
1760 	cnt--;
1761 
1762 	/*
1763 	 * The parser is not finished with the last write,
1764 	 * continue reading the user input without skipping spaces.
1765 	 */
1766 	if (!parser->cont) {
1767 		/* skip white space */
1768 		while (cnt && isspace(ch)) {
1769 			ret = get_user(ch, ubuf++);
1770 			if (ret)
1771 				goto out;
1772 			read++;
1773 			cnt--;
1774 		}
1775 
1776 		parser->idx = 0;
1777 
1778 		/* only spaces were written */
1779 		if (isspace(ch) || !ch) {
1780 			*ppos += read;
1781 			ret = read;
1782 			goto out;
1783 		}
1784 	}
1785 
1786 	/* read the non-space input */
1787 	while (cnt && !isspace(ch) && ch) {
1788 		if (parser->idx < parser->size - 1)
1789 			parser->buffer[parser->idx++] = ch;
1790 		else {
1791 			ret = -EINVAL;
1792 			goto out;
1793 		}
1794 		ret = get_user(ch, ubuf++);
1795 		if (ret)
1796 			goto out;
1797 		read++;
1798 		cnt--;
1799 	}
1800 
1801 	/* We either got finished input or we have to wait for another call. */
1802 	if (isspace(ch) || !ch) {
1803 		parser->buffer[parser->idx] = 0;
1804 		parser->cont = false;
1805 	} else if (parser->idx < parser->size - 1) {
1806 		parser->cont = true;
1807 		parser->buffer[parser->idx++] = ch;
1808 		/* Make sure the parsed string always terminates with '\0'. */
1809 		parser->buffer[parser->idx] = 0;
1810 	} else {
1811 		ret = -EINVAL;
1812 		goto out;
1813 	}
1814 
1815 	*ppos += read;
1816 	ret = read;
1817 
1818 out:
1819 	return ret;
1820 }
1821 
1822 /* TODO add a seq_buf_to_buffer() */
trace_seq_to_buffer(struct trace_seq * s,void * buf,size_t cnt)1823 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1824 {
1825 	int len;
1826 
1827 	if (trace_seq_used(s) <= s->readpos)
1828 		return -EBUSY;
1829 
1830 	len = trace_seq_used(s) - s->readpos;
1831 	if (cnt > len)
1832 		cnt = len;
1833 	memcpy(buf, s->buffer + s->readpos, cnt);
1834 
1835 	s->readpos += cnt;
1836 	return cnt;
1837 }
1838 
1839 unsigned long __read_mostly	tracing_thresh;
1840 
1841 #ifdef CONFIG_TRACER_MAX_TRACE
1842 static const struct file_operations tracing_max_lat_fops;
1843 
1844 #ifdef LATENCY_FS_NOTIFY
1845 
1846 static struct workqueue_struct *fsnotify_wq;
1847 
latency_fsnotify_workfn(struct work_struct * work)1848 static void latency_fsnotify_workfn(struct work_struct *work)
1849 {
1850 	struct trace_array *tr = container_of(work, struct trace_array,
1851 					      fsnotify_work);
1852 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1853 }
1854 
latency_fsnotify_workfn_irq(struct irq_work * iwork)1855 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1856 {
1857 	struct trace_array *tr = container_of(iwork, struct trace_array,
1858 					      fsnotify_irqwork);
1859 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1860 }
1861 
trace_create_maxlat_file(struct trace_array * tr,struct dentry * d_tracer)1862 static void trace_create_maxlat_file(struct trace_array *tr,
1863 				     struct dentry *d_tracer)
1864 {
1865 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1866 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1867 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1868 					      TRACE_MODE_WRITE,
1869 					      d_tracer, tr,
1870 					      &tracing_max_lat_fops);
1871 }
1872 
latency_fsnotify_init(void)1873 __init static int latency_fsnotify_init(void)
1874 {
1875 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1876 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1877 	if (!fsnotify_wq) {
1878 		pr_err("Unable to allocate tr_max_lat_wq\n");
1879 		return -ENOMEM;
1880 	}
1881 	return 0;
1882 }
1883 
1884 late_initcall_sync(latency_fsnotify_init);
1885 
latency_fsnotify(struct trace_array * tr)1886 void latency_fsnotify(struct trace_array *tr)
1887 {
1888 	if (!fsnotify_wq)
1889 		return;
1890 	/*
1891 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1892 	 * possible that we are called from __schedule() or do_idle(), which
1893 	 * could cause a deadlock.
1894 	 */
1895 	irq_work_queue(&tr->fsnotify_irqwork);
1896 }
1897 
1898 #else /* !LATENCY_FS_NOTIFY */
1899 
1900 #define trace_create_maxlat_file(tr, d_tracer)				\
1901 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1902 			  d_tracer, tr, &tracing_max_lat_fops)
1903 
1904 #endif
1905 
1906 /*
1907  * Copy the new maximum trace into the separate maximum-trace
1908  * structure. (this way the maximum trace is permanently saved,
1909  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1910  */
1911 static void
__update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu)1912 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1913 {
1914 	struct array_buffer *trace_buf = &tr->array_buffer;
1915 	struct array_buffer *max_buf = &tr->max_buffer;
1916 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1917 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1918 
1919 	max_buf->cpu = cpu;
1920 	max_buf->time_start = data->preempt_timestamp;
1921 
1922 	max_data->saved_latency = tr->max_latency;
1923 	max_data->critical_start = data->critical_start;
1924 	max_data->critical_end = data->critical_end;
1925 
1926 	strscpy(max_data->comm, tsk->comm);
1927 	max_data->pid = tsk->pid;
1928 	/*
1929 	 * If tsk == current, then use current_uid(), as that does not use
1930 	 * RCU. The irq tracer can be called out of RCU scope.
1931 	 */
1932 	if (tsk == current)
1933 		max_data->uid = current_uid();
1934 	else
1935 		max_data->uid = task_uid(tsk);
1936 
1937 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1938 	max_data->policy = tsk->policy;
1939 	max_data->rt_priority = tsk->rt_priority;
1940 
1941 	/* record this tasks comm */
1942 	tracing_record_cmdline(tsk);
1943 	latency_fsnotify(tr);
1944 }
1945 
1946 /**
1947  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1948  * @tr: tracer
1949  * @tsk: the task with the latency
1950  * @cpu: The cpu that initiated the trace.
1951  * @cond_data: User data associated with a conditional snapshot
1952  *
1953  * Flip the buffers between the @tr and the max_tr and record information
1954  * about which task was the cause of this latency.
1955  */
1956 void
update_max_tr(struct trace_array * tr,struct task_struct * tsk,int cpu,void * cond_data)1957 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1958 	      void *cond_data)
1959 {
1960 	if (tr->stop_count)
1961 		return;
1962 
1963 	WARN_ON_ONCE(!irqs_disabled());
1964 
1965 	if (!tr->allocated_snapshot) {
1966 		/* Only the nop tracer should hit this when disabling */
1967 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1968 		return;
1969 	}
1970 
1971 	arch_spin_lock(&tr->max_lock);
1972 
1973 	/* Inherit the recordable setting from array_buffer */
1974 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1975 		ring_buffer_record_on(tr->max_buffer.buffer);
1976 	else
1977 		ring_buffer_record_off(tr->max_buffer.buffer);
1978 
1979 #ifdef CONFIG_TRACER_SNAPSHOT
1980 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1981 		arch_spin_unlock(&tr->max_lock);
1982 		return;
1983 	}
1984 #endif
1985 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1986 
1987 	__update_max_tr(tr, tsk, cpu);
1988 
1989 	arch_spin_unlock(&tr->max_lock);
1990 
1991 	/* Any waiters on the old snapshot buffer need to wake up */
1992 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1993 }
1994 
1995 /**
1996  * update_max_tr_single - only copy one trace over, and reset the rest
1997  * @tr: tracer
1998  * @tsk: task with the latency
1999  * @cpu: the cpu of the buffer to copy.
2000  *
2001  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2002  */
2003 void
update_max_tr_single(struct trace_array * tr,struct task_struct * tsk,int cpu)2004 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2005 {
2006 	int ret;
2007 
2008 	if (tr->stop_count)
2009 		return;
2010 
2011 	WARN_ON_ONCE(!irqs_disabled());
2012 	if (!tr->allocated_snapshot) {
2013 		/* Only the nop tracer should hit this when disabling */
2014 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2015 		return;
2016 	}
2017 
2018 	arch_spin_lock(&tr->max_lock);
2019 
2020 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2021 
2022 	if (ret == -EBUSY) {
2023 		/*
2024 		 * We failed to swap the buffer due to a commit taking
2025 		 * place on this CPU. We fail to record, but we reset
2026 		 * the max trace buffer (no one writes directly to it)
2027 		 * and flag that it failed.
2028 		 * Another reason is resize is in progress.
2029 		 */
2030 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2031 			"Failed to swap buffers due to commit or resize in progress\n");
2032 	}
2033 
2034 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2035 
2036 	__update_max_tr(tr, tsk, cpu);
2037 	arch_spin_unlock(&tr->max_lock);
2038 }
2039 
2040 #endif /* CONFIG_TRACER_MAX_TRACE */
2041 
2042 struct pipe_wait {
2043 	struct trace_iterator		*iter;
2044 	int				wait_index;
2045 };
2046 
wait_pipe_cond(void * data)2047 static bool wait_pipe_cond(void *data)
2048 {
2049 	struct pipe_wait *pwait = data;
2050 	struct trace_iterator *iter = pwait->iter;
2051 
2052 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2053 		return true;
2054 
2055 	return iter->closed;
2056 }
2057 
wait_on_pipe(struct trace_iterator * iter,int full)2058 static int wait_on_pipe(struct trace_iterator *iter, int full)
2059 {
2060 	struct pipe_wait pwait;
2061 	int ret;
2062 
2063 	/* Iterators are static, they should be filled or empty */
2064 	if (trace_buffer_iter(iter, iter->cpu_file))
2065 		return 0;
2066 
2067 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2068 	pwait.iter = iter;
2069 
2070 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2071 			       wait_pipe_cond, &pwait);
2072 
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074 	/*
2075 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2076 	 * to happen, this would now be the main buffer.
2077 	 */
2078 	if (iter->snapshot)
2079 		iter->array_buffer = &iter->tr->max_buffer;
2080 #endif
2081 	return ret;
2082 }
2083 
2084 #ifdef CONFIG_FTRACE_STARTUP_TEST
2085 static bool selftests_can_run;
2086 
2087 struct trace_selftests {
2088 	struct list_head		list;
2089 	struct tracer			*type;
2090 };
2091 
2092 static LIST_HEAD(postponed_selftests);
2093 
save_selftest(struct tracer * type)2094 static int save_selftest(struct tracer *type)
2095 {
2096 	struct trace_selftests *selftest;
2097 
2098 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2099 	if (!selftest)
2100 		return -ENOMEM;
2101 
2102 	selftest->type = type;
2103 	list_add(&selftest->list, &postponed_selftests);
2104 	return 0;
2105 }
2106 
run_tracer_selftest(struct tracer * type)2107 static int run_tracer_selftest(struct tracer *type)
2108 {
2109 	struct trace_array *tr = &global_trace;
2110 	struct tracer *saved_tracer = tr->current_trace;
2111 	int ret;
2112 
2113 	if (!type->selftest || tracing_selftest_disabled)
2114 		return 0;
2115 
2116 	/*
2117 	 * If a tracer registers early in boot up (before scheduling is
2118 	 * initialized and such), then do not run its selftests yet.
2119 	 * Instead, run it a little later in the boot process.
2120 	 */
2121 	if (!selftests_can_run)
2122 		return save_selftest(type);
2123 
2124 	if (!tracing_is_on()) {
2125 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2126 			type->name);
2127 		return 0;
2128 	}
2129 
2130 	/*
2131 	 * Run a selftest on this tracer.
2132 	 * Here we reset the trace buffer, and set the current
2133 	 * tracer to be this tracer. The tracer can then run some
2134 	 * internal tracing to verify that everything is in order.
2135 	 * If we fail, we do not register this tracer.
2136 	 */
2137 	tracing_reset_online_cpus(&tr->array_buffer);
2138 
2139 	tr->current_trace = type;
2140 
2141 #ifdef CONFIG_TRACER_MAX_TRACE
2142 	if (type->use_max_tr) {
2143 		/* If we expanded the buffers, make sure the max is expanded too */
2144 		if (tr->ring_buffer_expanded)
2145 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2146 					   RING_BUFFER_ALL_CPUS);
2147 		tr->allocated_snapshot = true;
2148 	}
2149 #endif
2150 
2151 	/* the test is responsible for initializing and enabling */
2152 	pr_info("Testing tracer %s: ", type->name);
2153 	ret = type->selftest(type, tr);
2154 	/* the test is responsible for resetting too */
2155 	tr->current_trace = saved_tracer;
2156 	if (ret) {
2157 		printk(KERN_CONT "FAILED!\n");
2158 		/* Add the warning after printing 'FAILED' */
2159 		WARN_ON(1);
2160 		return -1;
2161 	}
2162 	/* Only reset on passing, to avoid touching corrupted buffers */
2163 	tracing_reset_online_cpus(&tr->array_buffer);
2164 
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166 	if (type->use_max_tr) {
2167 		tr->allocated_snapshot = false;
2168 
2169 		/* Shrink the max buffer again */
2170 		if (tr->ring_buffer_expanded)
2171 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2172 					   RING_BUFFER_ALL_CPUS);
2173 	}
2174 #endif
2175 
2176 	printk(KERN_CONT "PASSED\n");
2177 	return 0;
2178 }
2179 
do_run_tracer_selftest(struct tracer * type)2180 static int do_run_tracer_selftest(struct tracer *type)
2181 {
2182 	int ret;
2183 
2184 	/*
2185 	 * Tests can take a long time, especially if they are run one after the
2186 	 * other, as does happen during bootup when all the tracers are
2187 	 * registered. This could cause the soft lockup watchdog to trigger.
2188 	 */
2189 	cond_resched();
2190 
2191 	tracing_selftest_running = true;
2192 	ret = run_tracer_selftest(type);
2193 	tracing_selftest_running = false;
2194 
2195 	return ret;
2196 }
2197 
init_trace_selftests(void)2198 static __init int init_trace_selftests(void)
2199 {
2200 	struct trace_selftests *p, *n;
2201 	struct tracer *t, **last;
2202 	int ret;
2203 
2204 	selftests_can_run = true;
2205 
2206 	mutex_lock(&trace_types_lock);
2207 
2208 	if (list_empty(&postponed_selftests))
2209 		goto out;
2210 
2211 	pr_info("Running postponed tracer tests:\n");
2212 
2213 	tracing_selftest_running = true;
2214 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2215 		/* This loop can take minutes when sanitizers are enabled, so
2216 		 * lets make sure we allow RCU processing.
2217 		 */
2218 		cond_resched();
2219 		ret = run_tracer_selftest(p->type);
2220 		/* If the test fails, then warn and remove from available_tracers */
2221 		if (ret < 0) {
2222 			WARN(1, "tracer: %s failed selftest, disabling\n",
2223 			     p->type->name);
2224 			last = &trace_types;
2225 			for (t = trace_types; t; t = t->next) {
2226 				if (t == p->type) {
2227 					*last = t->next;
2228 					break;
2229 				}
2230 				last = &t->next;
2231 			}
2232 		}
2233 		list_del(&p->list);
2234 		kfree(p);
2235 	}
2236 	tracing_selftest_running = false;
2237 
2238  out:
2239 	mutex_unlock(&trace_types_lock);
2240 
2241 	return 0;
2242 }
2243 core_initcall(init_trace_selftests);
2244 #else
do_run_tracer_selftest(struct tracer * type)2245 static inline int do_run_tracer_selftest(struct tracer *type)
2246 {
2247 	return 0;
2248 }
2249 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2250 
2251 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2252 
2253 static void __init apply_trace_boot_options(void);
2254 
2255 /**
2256  * register_tracer - register a tracer with the ftrace system.
2257  * @type: the plugin for the tracer
2258  *
2259  * Register a new plugin tracer.
2260  */
register_tracer(struct tracer * type)2261 int __init register_tracer(struct tracer *type)
2262 {
2263 	struct tracer *t;
2264 	int ret = 0;
2265 
2266 	if (!type->name) {
2267 		pr_info("Tracer must have a name\n");
2268 		return -1;
2269 	}
2270 
2271 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2272 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2273 		return -1;
2274 	}
2275 
2276 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2277 		pr_warn("Can not register tracer %s due to lockdown\n",
2278 			   type->name);
2279 		return -EPERM;
2280 	}
2281 
2282 	mutex_lock(&trace_types_lock);
2283 
2284 	for (t = trace_types; t; t = t->next) {
2285 		if (strcmp(type->name, t->name) == 0) {
2286 			/* already found */
2287 			pr_info("Tracer %s already registered\n",
2288 				type->name);
2289 			ret = -1;
2290 			goto out;
2291 		}
2292 	}
2293 
2294 	if (!type->set_flag)
2295 		type->set_flag = &dummy_set_flag;
2296 	if (!type->flags) {
2297 		/*allocate a dummy tracer_flags*/
2298 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2299 		if (!type->flags) {
2300 			ret = -ENOMEM;
2301 			goto out;
2302 		}
2303 		type->flags->val = 0;
2304 		type->flags->opts = dummy_tracer_opt;
2305 	} else
2306 		if (!type->flags->opts)
2307 			type->flags->opts = dummy_tracer_opt;
2308 
2309 	/* store the tracer for __set_tracer_option */
2310 	type->flags->trace = type;
2311 
2312 	ret = do_run_tracer_selftest(type);
2313 	if (ret < 0)
2314 		goto out;
2315 
2316 	type->next = trace_types;
2317 	trace_types = type;
2318 	add_tracer_options(&global_trace, type);
2319 
2320  out:
2321 	mutex_unlock(&trace_types_lock);
2322 
2323 	if (ret || !default_bootup_tracer)
2324 		goto out_unlock;
2325 
2326 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2327 		goto out_unlock;
2328 
2329 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2330 	/* Do we want this tracer to start on bootup? */
2331 	tracing_set_tracer(&global_trace, type->name);
2332 	default_bootup_tracer = NULL;
2333 
2334 	apply_trace_boot_options();
2335 
2336 	/* disable other selftests, since this will break it. */
2337 	disable_tracing_selftest("running a tracer");
2338 
2339  out_unlock:
2340 	return ret;
2341 }
2342 
tracing_reset_cpu(struct array_buffer * buf,int cpu)2343 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2344 {
2345 	struct trace_buffer *buffer = buf->buffer;
2346 
2347 	if (!buffer)
2348 		return;
2349 
2350 	ring_buffer_record_disable(buffer);
2351 
2352 	/* Make sure all commits have finished */
2353 	synchronize_rcu();
2354 	ring_buffer_reset_cpu(buffer, cpu);
2355 
2356 	ring_buffer_record_enable(buffer);
2357 }
2358 
tracing_reset_online_cpus(struct array_buffer * buf)2359 void tracing_reset_online_cpus(struct array_buffer *buf)
2360 {
2361 	struct trace_buffer *buffer = buf->buffer;
2362 
2363 	if (!buffer)
2364 		return;
2365 
2366 	ring_buffer_record_disable(buffer);
2367 
2368 	/* Make sure all commits have finished */
2369 	synchronize_rcu();
2370 
2371 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2372 
2373 	ring_buffer_reset_online_cpus(buffer);
2374 
2375 	ring_buffer_record_enable(buffer);
2376 }
2377 
tracing_reset_all_cpus(struct array_buffer * buf)2378 static void tracing_reset_all_cpus(struct array_buffer *buf)
2379 {
2380 	struct trace_buffer *buffer = buf->buffer;
2381 
2382 	if (!buffer)
2383 		return;
2384 
2385 	ring_buffer_record_disable(buffer);
2386 
2387 	/* Make sure all commits have finished */
2388 	synchronize_rcu();
2389 
2390 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2391 
2392 	ring_buffer_reset(buffer);
2393 
2394 	ring_buffer_record_enable(buffer);
2395 }
2396 
2397 /* Must have trace_types_lock held */
tracing_reset_all_online_cpus_unlocked(void)2398 void tracing_reset_all_online_cpus_unlocked(void)
2399 {
2400 	struct trace_array *tr;
2401 
2402 	lockdep_assert_held(&trace_types_lock);
2403 
2404 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2405 		if (!tr->clear_trace)
2406 			continue;
2407 		tr->clear_trace = false;
2408 		tracing_reset_online_cpus(&tr->array_buffer);
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410 		tracing_reset_online_cpus(&tr->max_buffer);
2411 #endif
2412 	}
2413 }
2414 
tracing_reset_all_online_cpus(void)2415 void tracing_reset_all_online_cpus(void)
2416 {
2417 	mutex_lock(&trace_types_lock);
2418 	tracing_reset_all_online_cpus_unlocked();
2419 	mutex_unlock(&trace_types_lock);
2420 }
2421 
is_tracing_stopped(void)2422 int is_tracing_stopped(void)
2423 {
2424 	return global_trace.stop_count;
2425 }
2426 
tracing_start_tr(struct trace_array * tr)2427 static void tracing_start_tr(struct trace_array *tr)
2428 {
2429 	struct trace_buffer *buffer;
2430 	unsigned long flags;
2431 
2432 	if (tracing_disabled)
2433 		return;
2434 
2435 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2436 	if (--tr->stop_count) {
2437 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2438 			/* Someone screwed up their debugging */
2439 			tr->stop_count = 0;
2440 		}
2441 		goto out;
2442 	}
2443 
2444 	/* Prevent the buffers from switching */
2445 	arch_spin_lock(&tr->max_lock);
2446 
2447 	buffer = tr->array_buffer.buffer;
2448 	if (buffer)
2449 		ring_buffer_record_enable(buffer);
2450 
2451 #ifdef CONFIG_TRACER_MAX_TRACE
2452 	buffer = tr->max_buffer.buffer;
2453 	if (buffer)
2454 		ring_buffer_record_enable(buffer);
2455 #endif
2456 
2457 	arch_spin_unlock(&tr->max_lock);
2458 
2459  out:
2460 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2461 }
2462 
2463 /**
2464  * tracing_start - quick start of the tracer
2465  *
2466  * If tracing is enabled but was stopped by tracing_stop,
2467  * this will start the tracer back up.
2468  */
tracing_start(void)2469 void tracing_start(void)
2470 
2471 {
2472 	return tracing_start_tr(&global_trace);
2473 }
2474 
tracing_stop_tr(struct trace_array * tr)2475 static void tracing_stop_tr(struct trace_array *tr)
2476 {
2477 	struct trace_buffer *buffer;
2478 	unsigned long flags;
2479 
2480 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2481 	if (tr->stop_count++)
2482 		goto out;
2483 
2484 	/* Prevent the buffers from switching */
2485 	arch_spin_lock(&tr->max_lock);
2486 
2487 	buffer = tr->array_buffer.buffer;
2488 	if (buffer)
2489 		ring_buffer_record_disable(buffer);
2490 
2491 #ifdef CONFIG_TRACER_MAX_TRACE
2492 	buffer = tr->max_buffer.buffer;
2493 	if (buffer)
2494 		ring_buffer_record_disable(buffer);
2495 #endif
2496 
2497 	arch_spin_unlock(&tr->max_lock);
2498 
2499  out:
2500 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2501 }
2502 
2503 /**
2504  * tracing_stop - quick stop of the tracer
2505  *
2506  * Light weight way to stop tracing. Use in conjunction with
2507  * tracing_start.
2508  */
tracing_stop(void)2509 void tracing_stop(void)
2510 {
2511 	return tracing_stop_tr(&global_trace);
2512 }
2513 
2514 /*
2515  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2516  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2517  * simplifies those functions and keeps them in sync.
2518  */
trace_handle_return(struct trace_seq * s)2519 enum print_line_t trace_handle_return(struct trace_seq *s)
2520 {
2521 	return trace_seq_has_overflowed(s) ?
2522 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_handle_return);
2525 
migration_disable_value(void)2526 static unsigned short migration_disable_value(void)
2527 {
2528 #if defined(CONFIG_SMP)
2529 	return current->migration_disabled;
2530 #else
2531 	return 0;
2532 #endif
2533 }
2534 
tracing_gen_ctx_irq_test(unsigned int irqs_status)2535 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2536 {
2537 	unsigned int trace_flags = irqs_status;
2538 	unsigned int pc;
2539 
2540 	pc = preempt_count();
2541 
2542 	if (pc & NMI_MASK)
2543 		trace_flags |= TRACE_FLAG_NMI;
2544 	if (pc & HARDIRQ_MASK)
2545 		trace_flags |= TRACE_FLAG_HARDIRQ;
2546 	if (in_serving_softirq())
2547 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2548 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2549 		trace_flags |= TRACE_FLAG_BH_OFF;
2550 
2551 	if (tif_need_resched())
2552 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2553 	if (test_preempt_need_resched())
2554 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2555 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2556 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2557 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2558 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2559 }
2560 
2561 struct ring_buffer_event *
trace_buffer_lock_reserve(struct trace_buffer * buffer,int type,unsigned long len,unsigned int trace_ctx)2562 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2563 			  int type,
2564 			  unsigned long len,
2565 			  unsigned int trace_ctx)
2566 {
2567 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2568 }
2569 
2570 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2571 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2572 static int trace_buffered_event_ref;
2573 
2574 /**
2575  * trace_buffered_event_enable - enable buffering events
2576  *
2577  * When events are being filtered, it is quicker to use a temporary
2578  * buffer to write the event data into if there's a likely chance
2579  * that it will not be committed. The discard of the ring buffer
2580  * is not as fast as committing, and is much slower than copying
2581  * a commit.
2582  *
2583  * When an event is to be filtered, allocate per cpu buffers to
2584  * write the event data into, and if the event is filtered and discarded
2585  * it is simply dropped, otherwise, the entire data is to be committed
2586  * in one shot.
2587  */
trace_buffered_event_enable(void)2588 void trace_buffered_event_enable(void)
2589 {
2590 	struct ring_buffer_event *event;
2591 	struct page *page;
2592 	int cpu;
2593 
2594 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2595 
2596 	if (trace_buffered_event_ref++)
2597 		return;
2598 
2599 	for_each_tracing_cpu(cpu) {
2600 		page = alloc_pages_node(cpu_to_node(cpu),
2601 					GFP_KERNEL | __GFP_NORETRY, 0);
2602 		/* This is just an optimization and can handle failures */
2603 		if (!page) {
2604 			pr_err("Failed to allocate event buffer\n");
2605 			break;
2606 		}
2607 
2608 		event = page_address(page);
2609 		memset(event, 0, sizeof(*event));
2610 
2611 		per_cpu(trace_buffered_event, cpu) = event;
2612 
2613 		preempt_disable();
2614 		if (cpu == smp_processor_id() &&
2615 		    __this_cpu_read(trace_buffered_event) !=
2616 		    per_cpu(trace_buffered_event, cpu))
2617 			WARN_ON_ONCE(1);
2618 		preempt_enable();
2619 	}
2620 }
2621 
enable_trace_buffered_event(void * data)2622 static void enable_trace_buffered_event(void *data)
2623 {
2624 	/* Probably not needed, but do it anyway */
2625 	smp_rmb();
2626 	this_cpu_dec(trace_buffered_event_cnt);
2627 }
2628 
disable_trace_buffered_event(void * data)2629 static void disable_trace_buffered_event(void *data)
2630 {
2631 	this_cpu_inc(trace_buffered_event_cnt);
2632 }
2633 
2634 /**
2635  * trace_buffered_event_disable - disable buffering events
2636  *
2637  * When a filter is removed, it is faster to not use the buffered
2638  * events, and to commit directly into the ring buffer. Free up
2639  * the temp buffers when there are no more users. This requires
2640  * special synchronization with current events.
2641  */
trace_buffered_event_disable(void)2642 void trace_buffered_event_disable(void)
2643 {
2644 	int cpu;
2645 
2646 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647 
2648 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2649 		return;
2650 
2651 	if (--trace_buffered_event_ref)
2652 		return;
2653 
2654 	/* For each CPU, set the buffer as used. */
2655 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2656 			 NULL, true);
2657 
2658 	/* Wait for all current users to finish */
2659 	synchronize_rcu();
2660 
2661 	for_each_tracing_cpu(cpu) {
2662 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2663 		per_cpu(trace_buffered_event, cpu) = NULL;
2664 	}
2665 
2666 	/*
2667 	 * Wait for all CPUs that potentially started checking if they can use
2668 	 * their event buffer only after the previous synchronize_rcu() call and
2669 	 * they still read a valid pointer from trace_buffered_event. It must be
2670 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2671 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2672 	 */
2673 	synchronize_rcu();
2674 
2675 	/* For each CPU, relinquish the buffer */
2676 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2677 			 true);
2678 }
2679 
2680 static struct trace_buffer *temp_buffer;
2681 
2682 struct ring_buffer_event *
trace_event_buffer_lock_reserve(struct trace_buffer ** current_rb,struct trace_event_file * trace_file,int type,unsigned long len,unsigned int trace_ctx)2683 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2684 			  struct trace_event_file *trace_file,
2685 			  int type, unsigned long len,
2686 			  unsigned int trace_ctx)
2687 {
2688 	struct ring_buffer_event *entry;
2689 	struct trace_array *tr = trace_file->tr;
2690 	int val;
2691 
2692 	*current_rb = tr->array_buffer.buffer;
2693 
2694 	if (!tr->no_filter_buffering_ref &&
2695 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2696 		preempt_disable_notrace();
2697 		/*
2698 		 * Filtering is on, so try to use the per cpu buffer first.
2699 		 * This buffer will simulate a ring_buffer_event,
2700 		 * where the type_len is zero and the array[0] will
2701 		 * hold the full length.
2702 		 * (see include/linux/ring-buffer.h for details on
2703 		 *  how the ring_buffer_event is structured).
2704 		 *
2705 		 * Using a temp buffer during filtering and copying it
2706 		 * on a matched filter is quicker than writing directly
2707 		 * into the ring buffer and then discarding it when
2708 		 * it doesn't match. That is because the discard
2709 		 * requires several atomic operations to get right.
2710 		 * Copying on match and doing nothing on a failed match
2711 		 * is still quicker than no copy on match, but having
2712 		 * to discard out of the ring buffer on a failed match.
2713 		 */
2714 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2715 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2716 
2717 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2718 
2719 			/*
2720 			 * Preemption is disabled, but interrupts and NMIs
2721 			 * can still come in now. If that happens after
2722 			 * the above increment, then it will have to go
2723 			 * back to the old method of allocating the event
2724 			 * on the ring buffer, and if the filter fails, it
2725 			 * will have to call ring_buffer_discard_commit()
2726 			 * to remove it.
2727 			 *
2728 			 * Need to also check the unlikely case that the
2729 			 * length is bigger than the temp buffer size.
2730 			 * If that happens, then the reserve is pretty much
2731 			 * guaranteed to fail, as the ring buffer currently
2732 			 * only allows events less than a page. But that may
2733 			 * change in the future, so let the ring buffer reserve
2734 			 * handle the failure in that case.
2735 			 */
2736 			if (val == 1 && likely(len <= max_len)) {
2737 				trace_event_setup(entry, type, trace_ctx);
2738 				entry->array[0] = len;
2739 				/* Return with preemption disabled */
2740 				return entry;
2741 			}
2742 			this_cpu_dec(trace_buffered_event_cnt);
2743 		}
2744 		/* __trace_buffer_lock_reserve() disables preemption */
2745 		preempt_enable_notrace();
2746 	}
2747 
2748 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2749 					    trace_ctx);
2750 	/*
2751 	 * If tracing is off, but we have triggers enabled
2752 	 * we still need to look at the event data. Use the temp_buffer
2753 	 * to store the trace event for the trigger to use. It's recursive
2754 	 * safe and will not be recorded anywhere.
2755 	 */
2756 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757 		*current_rb = temp_buffer;
2758 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2759 						    trace_ctx);
2760 	}
2761 	return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764 
2765 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767 
output_printk(struct trace_event_buffer * fbuffer)2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770 	struct trace_event_call *event_call;
2771 	struct trace_event_file *file;
2772 	struct trace_event *event;
2773 	unsigned long flags;
2774 	struct trace_iterator *iter = tracepoint_print_iter;
2775 
2776 	/* We should never get here if iter is NULL */
2777 	if (WARN_ON_ONCE(!iter))
2778 		return;
2779 
2780 	event_call = fbuffer->trace_file->event_call;
2781 	if (!event_call || !event_call->event.funcs ||
2782 	    !event_call->event.funcs->trace)
2783 		return;
2784 
2785 	file = fbuffer->trace_file;
2786 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788 	     !filter_match_preds(file->filter, fbuffer->entry)))
2789 		return;
2790 
2791 	event = &fbuffer->trace_file->event_call->event;
2792 
2793 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794 	trace_seq_init(&iter->seq);
2795 	iter->ent = fbuffer->entry;
2796 	event_call->event.funcs->trace(iter, 0, event);
2797 	trace_seq_putc(&iter->seq, 0);
2798 	printk("%s", iter->seq.buffer);
2799 
2800 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802 
tracepoint_printk_sysctl(const struct ctl_table * table,int write,void * buffer,size_t * lenp,loff_t * ppos)2803 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2804 			     void *buffer, size_t *lenp,
2805 			     loff_t *ppos)
2806 {
2807 	int save_tracepoint_printk;
2808 	int ret;
2809 
2810 	mutex_lock(&tracepoint_printk_mutex);
2811 	save_tracepoint_printk = tracepoint_printk;
2812 
2813 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814 
2815 	/*
2816 	 * This will force exiting early, as tracepoint_printk
2817 	 * is always zero when tracepoint_printk_iter is not allocated
2818 	 */
2819 	if (!tracepoint_print_iter)
2820 		tracepoint_printk = 0;
2821 
2822 	if (save_tracepoint_printk == tracepoint_printk)
2823 		goto out;
2824 
2825 	if (tracepoint_printk)
2826 		static_key_enable(&tracepoint_printk_key.key);
2827 	else
2828 		static_key_disable(&tracepoint_printk_key.key);
2829 
2830  out:
2831 	mutex_unlock(&tracepoint_printk_mutex);
2832 
2833 	return ret;
2834 }
2835 
trace_event_buffer_commit(struct trace_event_buffer * fbuffer)2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838 	enum event_trigger_type tt = ETT_NONE;
2839 	struct trace_event_file *file = fbuffer->trace_file;
2840 
2841 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2842 			fbuffer->entry, &tt))
2843 		goto discard;
2844 
2845 	if (static_key_false(&tracepoint_printk_key.key))
2846 		output_printk(fbuffer);
2847 
2848 	if (static_branch_unlikely(&trace_event_exports_enabled))
2849 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2850 
2851 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2852 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2853 
2854 discard:
2855 	if (tt)
2856 		event_triggers_post_call(file, tt);
2857 
2858 }
2859 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2860 
2861 /*
2862  * Skip 3:
2863  *
2864  *   trace_buffer_unlock_commit_regs()
2865  *   trace_event_buffer_commit()
2866  *   trace_event_raw_event_xxx()
2867  */
2868 # define STACK_SKIP 3
2869 
trace_buffer_unlock_commit_regs(struct trace_array * tr,struct trace_buffer * buffer,struct ring_buffer_event * event,unsigned int trace_ctx,struct pt_regs * regs)2870 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2871 				     struct trace_buffer *buffer,
2872 				     struct ring_buffer_event *event,
2873 				     unsigned int trace_ctx,
2874 				     struct pt_regs *regs)
2875 {
2876 	__buffer_unlock_commit(buffer, event);
2877 
2878 	/*
2879 	 * If regs is not set, then skip the necessary functions.
2880 	 * Note, we can still get here via blktrace, wakeup tracer
2881 	 * and mmiotrace, but that's ok if they lose a function or
2882 	 * two. They are not that meaningful.
2883 	 */
2884 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2885 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2886 }
2887 
2888 /*
2889  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2890  */
2891 void
trace_buffer_unlock_commit_nostack(struct trace_buffer * buffer,struct ring_buffer_event * event)2892 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2893 				   struct ring_buffer_event *event)
2894 {
2895 	__buffer_unlock_commit(buffer, event);
2896 }
2897 
2898 void
trace_function(struct trace_array * tr,unsigned long ip,unsigned long parent_ip,unsigned int trace_ctx)2899 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2900 	       parent_ip, unsigned int trace_ctx)
2901 {
2902 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2903 	struct ring_buffer_event *event;
2904 	struct ftrace_entry *entry;
2905 
2906 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2907 					    trace_ctx);
2908 	if (!event)
2909 		return;
2910 	entry	= ring_buffer_event_data(event);
2911 	entry->ip			= ip;
2912 	entry->parent_ip		= parent_ip;
2913 
2914 	if (static_branch_unlikely(&trace_function_exports_enabled))
2915 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916 	__buffer_unlock_commit(buffer, event);
2917 }
2918 
2919 #ifdef CONFIG_STACKTRACE
2920 
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING	4
2923 
2924 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2925 
2926 struct ftrace_stack {
2927 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929 
2930 
2931 struct ftrace_stacks {
2932 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2933 };
2934 
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937 
__ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)2938 static void __ftrace_trace_stack(struct trace_array *tr,
2939 				 struct trace_buffer *buffer,
2940 				 unsigned int trace_ctx,
2941 				 int skip, struct pt_regs *regs)
2942 {
2943 	struct ring_buffer_event *event;
2944 	unsigned int size, nr_entries;
2945 	struct ftrace_stack *fstack;
2946 	struct stack_entry *entry;
2947 	int stackidx;
2948 
2949 	/*
2950 	 * Add one, for this function and the call to save_stack_trace()
2951 	 * If regs is set, then these functions will not be in the way.
2952 	 */
2953 #ifndef CONFIG_UNWINDER_ORC
2954 	if (!regs)
2955 		skip++;
2956 #endif
2957 
2958 	preempt_disable_notrace();
2959 
2960 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961 
2962 	/* This should never happen. If it does, yell once and skip */
2963 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964 		goto out;
2965 
2966 	/*
2967 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968 	 * interrupt will either see the value pre increment or post
2969 	 * increment. If the interrupt happens pre increment it will have
2970 	 * restored the counter when it returns.  We just need a barrier to
2971 	 * keep gcc from moving things around.
2972 	 */
2973 	barrier();
2974 
2975 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976 	size = ARRAY_SIZE(fstack->calls);
2977 
2978 	if (regs) {
2979 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980 						   size, skip);
2981 	} else {
2982 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2983 	}
2984 
2985 #ifdef CONFIG_DYNAMIC_FTRACE
2986 	/* Mark entry of stack trace as trampoline code */
2987 	if (tr->ops && tr->ops->trampoline) {
2988 		unsigned long tramp_start = tr->ops->trampoline;
2989 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2990 		unsigned long *calls = fstack->calls;
2991 
2992 		for (int i = 0; i < nr_entries; i++) {
2993 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2994 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2995 		}
2996 	}
2997 #endif
2998 
2999 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3000 				    struct_size(entry, caller, nr_entries),
3001 				    trace_ctx);
3002 	if (!event)
3003 		goto out;
3004 	entry = ring_buffer_event_data(event);
3005 
3006 	entry->size = nr_entries;
3007 	memcpy(&entry->caller, fstack->calls,
3008 	       flex_array_size(entry, caller, nr_entries));
3009 
3010 	__buffer_unlock_commit(buffer, event);
3011 
3012  out:
3013 	/* Again, don't let gcc optimize things here */
3014 	barrier();
3015 	__this_cpu_dec(ftrace_stack_reserve);
3016 	preempt_enable_notrace();
3017 
3018 }
3019 
ftrace_trace_stack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx,int skip,struct pt_regs * regs)3020 static inline void ftrace_trace_stack(struct trace_array *tr,
3021 				      struct trace_buffer *buffer,
3022 				      unsigned int trace_ctx,
3023 				      int skip, struct pt_regs *regs)
3024 {
3025 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3026 		return;
3027 
3028 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3029 }
3030 
__trace_stack(struct trace_array * tr,unsigned int trace_ctx,int skip)3031 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3032 		   int skip)
3033 {
3034 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3035 
3036 	if (rcu_is_watching()) {
3037 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038 		return;
3039 	}
3040 
3041 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3042 		return;
3043 
3044 	/*
3045 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3046 	 * but if the above rcu_is_watching() failed, then the NMI
3047 	 * triggered someplace critical, and ct_irq_enter() should
3048 	 * not be called from NMI.
3049 	 */
3050 	if (unlikely(in_nmi()))
3051 		return;
3052 
3053 	ct_irq_enter_irqson();
3054 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3055 	ct_irq_exit_irqson();
3056 }
3057 
3058 /**
3059  * trace_dump_stack - record a stack back trace in the trace buffer
3060  * @skip: Number of functions to skip (helper handlers)
3061  */
trace_dump_stack(int skip)3062 void trace_dump_stack(int skip)
3063 {
3064 	if (tracing_disabled || tracing_selftest_running)
3065 		return;
3066 
3067 #ifndef CONFIG_UNWINDER_ORC
3068 	/* Skip 1 to skip this function. */
3069 	skip++;
3070 #endif
3071 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3072 				tracing_gen_ctx(), skip, NULL);
3073 }
3074 EXPORT_SYMBOL_GPL(trace_dump_stack);
3075 
3076 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3077 static DEFINE_PER_CPU(int, user_stack_count);
3078 
3079 static void
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3080 ftrace_trace_userstack(struct trace_array *tr,
3081 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3082 {
3083 	struct ring_buffer_event *event;
3084 	struct userstack_entry *entry;
3085 
3086 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 		return;
3088 
3089 	/*
3090 	 * NMIs can not handle page faults, even with fix ups.
3091 	 * The save user stack can (and often does) fault.
3092 	 */
3093 	if (unlikely(in_nmi()))
3094 		return;
3095 
3096 	/*
3097 	 * prevent recursion, since the user stack tracing may
3098 	 * trigger other kernel events.
3099 	 */
3100 	preempt_disable();
3101 	if (__this_cpu_read(user_stack_count))
3102 		goto out;
3103 
3104 	__this_cpu_inc(user_stack_count);
3105 
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 					    sizeof(*entry), trace_ctx);
3108 	if (!event)
3109 		goto out_drop_count;
3110 	entry	= ring_buffer_event_data(event);
3111 
3112 	entry->tgid		= current->tgid;
3113 	memset(&entry->caller, 0, sizeof(entry->caller));
3114 
3115 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 	__buffer_unlock_commit(buffer, event);
3117 
3118  out_drop_count:
3119 	__this_cpu_dec(user_stack_count);
3120  out:
3121 	preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
ftrace_trace_userstack(struct trace_array * tr,struct trace_buffer * buffer,unsigned int trace_ctx)3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125 				   struct trace_buffer *buffer,
3126 				   unsigned int trace_ctx)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130 
3131 #endif /* CONFIG_STACKTRACE */
3132 
3133 static inline void
func_repeats_set_delta_ts(struct func_repeats_entry * entry,unsigned long long delta)3134 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3135 			  unsigned long long delta)
3136 {
3137 	entry->bottom_delta_ts = delta & U32_MAX;
3138 	entry->top_delta_ts = (delta >> 32);
3139 }
3140 
trace_last_func_repeats(struct trace_array * tr,struct trace_func_repeats * last_info,unsigned int trace_ctx)3141 void trace_last_func_repeats(struct trace_array *tr,
3142 			     struct trace_func_repeats *last_info,
3143 			     unsigned int trace_ctx)
3144 {
3145 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3146 	struct func_repeats_entry *entry;
3147 	struct ring_buffer_event *event;
3148 	u64 delta;
3149 
3150 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3151 					    sizeof(*entry), trace_ctx);
3152 	if (!event)
3153 		return;
3154 
3155 	delta = ring_buffer_event_time_stamp(buffer, event) -
3156 		last_info->ts_last_call;
3157 
3158 	entry = ring_buffer_event_data(event);
3159 	entry->ip = last_info->ip;
3160 	entry->parent_ip = last_info->parent_ip;
3161 	entry->count = last_info->count;
3162 	func_repeats_set_delta_ts(entry, delta);
3163 
3164 	__buffer_unlock_commit(buffer, event);
3165 }
3166 
3167 /* created for use with alloc_percpu */
3168 struct trace_buffer_struct {
3169 	int nesting;
3170 	char buffer[4][TRACE_BUF_SIZE];
3171 };
3172 
3173 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3174 
3175 /*
3176  * This allows for lockless recording.  If we're nested too deeply, then
3177  * this returns NULL.
3178  */
get_trace_buf(void)3179 static char *get_trace_buf(void)
3180 {
3181 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3182 
3183 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3184 		return NULL;
3185 
3186 	buffer->nesting++;
3187 
3188 	/* Interrupts must see nesting incremented before we use the buffer */
3189 	barrier();
3190 	return &buffer->buffer[buffer->nesting - 1][0];
3191 }
3192 
put_trace_buf(void)3193 static void put_trace_buf(void)
3194 {
3195 	/* Don't let the decrement of nesting leak before this */
3196 	barrier();
3197 	this_cpu_dec(trace_percpu_buffer->nesting);
3198 }
3199 
alloc_percpu_trace_buffer(void)3200 static int alloc_percpu_trace_buffer(void)
3201 {
3202 	struct trace_buffer_struct __percpu *buffers;
3203 
3204 	if (trace_percpu_buffer)
3205 		return 0;
3206 
3207 	buffers = alloc_percpu(struct trace_buffer_struct);
3208 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3209 		return -ENOMEM;
3210 
3211 	trace_percpu_buffer = buffers;
3212 	return 0;
3213 }
3214 
3215 static int buffers_allocated;
3216 
trace_printk_init_buffers(void)3217 void trace_printk_init_buffers(void)
3218 {
3219 	if (buffers_allocated)
3220 		return;
3221 
3222 	if (alloc_percpu_trace_buffer())
3223 		return;
3224 
3225 	/* trace_printk() is for debug use only. Don't use it in production. */
3226 
3227 	pr_warn("\n");
3228 	pr_warn("**********************************************************\n");
3229 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3230 	pr_warn("**                                                      **\n");
3231 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3232 	pr_warn("**                                                      **\n");
3233 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3234 	pr_warn("** unsafe for production use.                           **\n");
3235 	pr_warn("**                                                      **\n");
3236 	pr_warn("** If you see this message and you are not debugging    **\n");
3237 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3238 	pr_warn("**                                                      **\n");
3239 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3240 	pr_warn("**********************************************************\n");
3241 
3242 	/* Expand the buffers to set size */
3243 	tracing_update_buffers(&global_trace);
3244 
3245 	buffers_allocated = 1;
3246 
3247 	/*
3248 	 * trace_printk_init_buffers() can be called by modules.
3249 	 * If that happens, then we need to start cmdline recording
3250 	 * directly here. If the global_trace.buffer is already
3251 	 * allocated here, then this was called by module code.
3252 	 */
3253 	if (global_trace.array_buffer.buffer)
3254 		tracing_start_cmdline_record();
3255 }
3256 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3257 
trace_printk_start_comm(void)3258 void trace_printk_start_comm(void)
3259 {
3260 	/* Start tracing comms if trace printk is set */
3261 	if (!buffers_allocated)
3262 		return;
3263 	tracing_start_cmdline_record();
3264 }
3265 
trace_printk_start_stop_comm(int enabled)3266 static void trace_printk_start_stop_comm(int enabled)
3267 {
3268 	if (!buffers_allocated)
3269 		return;
3270 
3271 	if (enabled)
3272 		tracing_start_cmdline_record();
3273 	else
3274 		tracing_stop_cmdline_record();
3275 }
3276 
3277 /**
3278  * trace_vbprintk - write binary msg to tracing buffer
3279  * @ip:    The address of the caller
3280  * @fmt:   The string format to write to the buffer
3281  * @args:  Arguments for @fmt
3282  */
trace_vbprintk(unsigned long ip,const char * fmt,va_list args)3283 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3284 {
3285 	struct ring_buffer_event *event;
3286 	struct trace_buffer *buffer;
3287 	struct trace_array *tr = READ_ONCE(printk_trace);
3288 	struct bprint_entry *entry;
3289 	unsigned int trace_ctx;
3290 	char *tbuffer;
3291 	int len = 0, size;
3292 
3293 	if (!printk_binsafe(tr))
3294 		return trace_vprintk(ip, fmt, args);
3295 
3296 	if (unlikely(tracing_selftest_running || tracing_disabled))
3297 		return 0;
3298 
3299 	/* Don't pollute graph traces with trace_vprintk internals */
3300 	pause_graph_tracing();
3301 
3302 	trace_ctx = tracing_gen_ctx();
3303 	preempt_disable_notrace();
3304 
3305 	tbuffer = get_trace_buf();
3306 	if (!tbuffer) {
3307 		len = 0;
3308 		goto out_nobuffer;
3309 	}
3310 
3311 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3312 
3313 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3314 		goto out_put;
3315 
3316 	size = sizeof(*entry) + sizeof(u32) * len;
3317 	buffer = tr->array_buffer.buffer;
3318 	ring_buffer_nest_start(buffer);
3319 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3320 					    trace_ctx);
3321 	if (!event)
3322 		goto out;
3323 	entry = ring_buffer_event_data(event);
3324 	entry->ip			= ip;
3325 	entry->fmt			= fmt;
3326 
3327 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3328 	__buffer_unlock_commit(buffer, event);
3329 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3330 
3331 out:
3332 	ring_buffer_nest_end(buffer);
3333 out_put:
3334 	put_trace_buf();
3335 
3336 out_nobuffer:
3337 	preempt_enable_notrace();
3338 	unpause_graph_tracing();
3339 
3340 	return len;
3341 }
3342 EXPORT_SYMBOL_GPL(trace_vbprintk);
3343 
3344 __printf(3, 0)
3345 static int
__trace_array_vprintk(struct trace_buffer * buffer,unsigned long ip,const char * fmt,va_list args)3346 __trace_array_vprintk(struct trace_buffer *buffer,
3347 		      unsigned long ip, const char *fmt, va_list args)
3348 {
3349 	struct ring_buffer_event *event;
3350 	int len = 0, size;
3351 	struct print_entry *entry;
3352 	unsigned int trace_ctx;
3353 	char *tbuffer;
3354 
3355 	if (tracing_disabled)
3356 		return 0;
3357 
3358 	/* Don't pollute graph traces with trace_vprintk internals */
3359 	pause_graph_tracing();
3360 
3361 	trace_ctx = tracing_gen_ctx();
3362 	preempt_disable_notrace();
3363 
3364 
3365 	tbuffer = get_trace_buf();
3366 	if (!tbuffer) {
3367 		len = 0;
3368 		goto out_nobuffer;
3369 	}
3370 
3371 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3372 
3373 	size = sizeof(*entry) + len + 1;
3374 	ring_buffer_nest_start(buffer);
3375 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3376 					    trace_ctx);
3377 	if (!event)
3378 		goto out;
3379 	entry = ring_buffer_event_data(event);
3380 	entry->ip = ip;
3381 
3382 	memcpy(&entry->buf, tbuffer, len + 1);
3383 	__buffer_unlock_commit(buffer, event);
3384 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3385 
3386 out:
3387 	ring_buffer_nest_end(buffer);
3388 	put_trace_buf();
3389 
3390 out_nobuffer:
3391 	preempt_enable_notrace();
3392 	unpause_graph_tracing();
3393 
3394 	return len;
3395 }
3396 
3397 __printf(3, 0)
trace_array_vprintk(struct trace_array * tr,unsigned long ip,const char * fmt,va_list args)3398 int trace_array_vprintk(struct trace_array *tr,
3399 			unsigned long ip, const char *fmt, va_list args)
3400 {
3401 	if (tracing_selftest_running && tr == &global_trace)
3402 		return 0;
3403 
3404 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3405 }
3406 
3407 /**
3408  * trace_array_printk - Print a message to a specific instance
3409  * @tr: The instance trace_array descriptor
3410  * @ip: The instruction pointer that this is called from.
3411  * @fmt: The format to print (printf format)
3412  *
3413  * If a subsystem sets up its own instance, they have the right to
3414  * printk strings into their tracing instance buffer using this
3415  * function. Note, this function will not write into the top level
3416  * buffer (use trace_printk() for that), as writing into the top level
3417  * buffer should only have events that can be individually disabled.
3418  * trace_printk() is only used for debugging a kernel, and should not
3419  * be ever incorporated in normal use.
3420  *
3421  * trace_array_printk() can be used, as it will not add noise to the
3422  * top level tracing buffer.
3423  *
3424  * Note, trace_array_init_printk() must be called on @tr before this
3425  * can be used.
3426  */
3427 __printf(3, 0)
trace_array_printk(struct trace_array * tr,unsigned long ip,const char * fmt,...)3428 int trace_array_printk(struct trace_array *tr,
3429 		       unsigned long ip, const char *fmt, ...)
3430 {
3431 	int ret;
3432 	va_list ap;
3433 
3434 	if (!tr)
3435 		return -ENOENT;
3436 
3437 	/* This is only allowed for created instances */
3438 	if (tr == &global_trace)
3439 		return 0;
3440 
3441 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3442 		return 0;
3443 
3444 	va_start(ap, fmt);
3445 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3446 	va_end(ap);
3447 	return ret;
3448 }
3449 EXPORT_SYMBOL_GPL(trace_array_printk);
3450 
3451 /**
3452  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3453  * @tr: The trace array to initialize the buffers for
3454  *
3455  * As trace_array_printk() only writes into instances, they are OK to
3456  * have in the kernel (unlike trace_printk()). This needs to be called
3457  * before trace_array_printk() can be used on a trace_array.
3458  */
trace_array_init_printk(struct trace_array * tr)3459 int trace_array_init_printk(struct trace_array *tr)
3460 {
3461 	if (!tr)
3462 		return -ENOENT;
3463 
3464 	/* This is only allowed for created instances */
3465 	if (tr == &global_trace)
3466 		return -EINVAL;
3467 
3468 	return alloc_percpu_trace_buffer();
3469 }
3470 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3471 
3472 __printf(3, 4)
trace_array_printk_buf(struct trace_buffer * buffer,unsigned long ip,const char * fmt,...)3473 int trace_array_printk_buf(struct trace_buffer *buffer,
3474 			   unsigned long ip, const char *fmt, ...)
3475 {
3476 	int ret;
3477 	va_list ap;
3478 
3479 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3480 		return 0;
3481 
3482 	va_start(ap, fmt);
3483 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3484 	va_end(ap);
3485 	return ret;
3486 }
3487 
3488 __printf(2, 0)
trace_vprintk(unsigned long ip,const char * fmt,va_list args)3489 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3490 {
3491 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3492 }
3493 EXPORT_SYMBOL_GPL(trace_vprintk);
3494 
trace_iterator_increment(struct trace_iterator * iter)3495 static void trace_iterator_increment(struct trace_iterator *iter)
3496 {
3497 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3498 
3499 	iter->idx++;
3500 	if (buf_iter)
3501 		ring_buffer_iter_advance(buf_iter);
3502 }
3503 
3504 static struct trace_entry *
peek_next_entry(struct trace_iterator * iter,int cpu,u64 * ts,unsigned long * lost_events)3505 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3506 		unsigned long *lost_events)
3507 {
3508 	struct ring_buffer_event *event;
3509 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3510 
3511 	if (buf_iter) {
3512 		event = ring_buffer_iter_peek(buf_iter, ts);
3513 		if (lost_events)
3514 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3515 				(unsigned long)-1 : 0;
3516 	} else {
3517 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3518 					 lost_events);
3519 	}
3520 
3521 	if (event) {
3522 		iter->ent_size = ring_buffer_event_length(event);
3523 		return ring_buffer_event_data(event);
3524 	}
3525 	iter->ent_size = 0;
3526 	return NULL;
3527 }
3528 
3529 static struct trace_entry *
__find_next_entry(struct trace_iterator * iter,int * ent_cpu,unsigned long * missing_events,u64 * ent_ts)3530 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3531 		  unsigned long *missing_events, u64 *ent_ts)
3532 {
3533 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3534 	struct trace_entry *ent, *next = NULL;
3535 	unsigned long lost_events = 0, next_lost = 0;
3536 	int cpu_file = iter->cpu_file;
3537 	u64 next_ts = 0, ts;
3538 	int next_cpu = -1;
3539 	int next_size = 0;
3540 	int cpu;
3541 
3542 	/*
3543 	 * If we are in a per_cpu trace file, don't bother by iterating over
3544 	 * all cpu and peek directly.
3545 	 */
3546 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3547 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3548 			return NULL;
3549 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3550 		if (ent_cpu)
3551 			*ent_cpu = cpu_file;
3552 
3553 		return ent;
3554 	}
3555 
3556 	for_each_tracing_cpu(cpu) {
3557 
3558 		if (ring_buffer_empty_cpu(buffer, cpu))
3559 			continue;
3560 
3561 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3562 
3563 		/*
3564 		 * Pick the entry with the smallest timestamp:
3565 		 */
3566 		if (ent && (!next || ts < next_ts)) {
3567 			next = ent;
3568 			next_cpu = cpu;
3569 			next_ts = ts;
3570 			next_lost = lost_events;
3571 			next_size = iter->ent_size;
3572 		}
3573 	}
3574 
3575 	iter->ent_size = next_size;
3576 
3577 	if (ent_cpu)
3578 		*ent_cpu = next_cpu;
3579 
3580 	if (ent_ts)
3581 		*ent_ts = next_ts;
3582 
3583 	if (missing_events)
3584 		*missing_events = next_lost;
3585 
3586 	return next;
3587 }
3588 
3589 #define STATIC_FMT_BUF_SIZE	128
3590 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3591 
trace_iter_expand_format(struct trace_iterator * iter)3592 char *trace_iter_expand_format(struct trace_iterator *iter)
3593 {
3594 	char *tmp;
3595 
3596 	/*
3597 	 * iter->tr is NULL when used with tp_printk, which makes
3598 	 * this get called where it is not safe to call krealloc().
3599 	 */
3600 	if (!iter->tr || iter->fmt == static_fmt_buf)
3601 		return NULL;
3602 
3603 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3604 		       GFP_KERNEL);
3605 	if (tmp) {
3606 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3607 		iter->fmt = tmp;
3608 	}
3609 
3610 	return tmp;
3611 }
3612 
3613 /* Returns true if the string is safe to dereference from an event */
trace_safe_str(struct trace_iterator * iter,const char * str)3614 static bool trace_safe_str(struct trace_iterator *iter, const char *str)
3615 {
3616 	unsigned long addr = (unsigned long)str;
3617 	struct trace_event *trace_event;
3618 	struct trace_event_call *event;
3619 
3620 	/* OK if part of the event data */
3621 	if ((addr >= (unsigned long)iter->ent) &&
3622 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3623 		return true;
3624 
3625 	/* OK if part of the temp seq buffer */
3626 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3627 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3628 		return true;
3629 
3630 	/* Core rodata can not be freed */
3631 	if (is_kernel_rodata(addr))
3632 		return true;
3633 
3634 	if (trace_is_tracepoint_string(str))
3635 		return true;
3636 
3637 	/*
3638 	 * Now this could be a module event, referencing core module
3639 	 * data, which is OK.
3640 	 */
3641 	if (!iter->ent)
3642 		return false;
3643 
3644 	trace_event = ftrace_find_event(iter->ent->type);
3645 	if (!trace_event)
3646 		return false;
3647 
3648 	event = container_of(trace_event, struct trace_event_call, event);
3649 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3650 		return false;
3651 
3652 	/* Would rather have rodata, but this will suffice */
3653 	if (within_module_core(addr, event->module))
3654 		return true;
3655 
3656 	return false;
3657 }
3658 
3659 /**
3660  * ignore_event - Check dereferenced fields while writing to the seq buffer
3661  * @iter: The iterator that holds the seq buffer and the event being printed
3662  *
3663  * At boot up, test_event_printk() will flag any event that dereferences
3664  * a string with "%s" that does exist in the ring buffer. It may still
3665  * be valid, as the string may point to a static string in the kernel
3666  * rodata that never gets freed. But if the string pointer is pointing
3667  * to something that was allocated, there's a chance that it can be freed
3668  * by the time the user reads the trace. This would cause a bad memory
3669  * access by the kernel and possibly crash the system.
3670  *
3671  * This function will check if the event has any fields flagged as needing
3672  * to be checked at runtime and perform those checks.
3673  *
3674  * If it is found that a field is unsafe, it will write into the @iter->seq
3675  * a message stating what was found to be unsafe.
3676  *
3677  * @return: true if the event is unsafe and should be ignored,
3678  *          false otherwise.
3679  */
ignore_event(struct trace_iterator * iter)3680 bool ignore_event(struct trace_iterator *iter)
3681 {
3682 	struct ftrace_event_field *field;
3683 	struct trace_event *trace_event;
3684 	struct trace_event_call *event;
3685 	struct list_head *head;
3686 	struct trace_seq *seq;
3687 	const void *ptr;
3688 
3689 	trace_event = ftrace_find_event(iter->ent->type);
3690 
3691 	seq = &iter->seq;
3692 
3693 	if (!trace_event) {
3694 		trace_seq_printf(seq, "EVENT ID %d NOT FOUND?\n", iter->ent->type);
3695 		return true;
3696 	}
3697 
3698 	event = container_of(trace_event, struct trace_event_call, event);
3699 	if (!(event->flags & TRACE_EVENT_FL_TEST_STR))
3700 		return false;
3701 
3702 	head = trace_get_fields(event);
3703 	if (!head) {
3704 		trace_seq_printf(seq, "FIELDS FOR EVENT '%s' NOT FOUND?\n",
3705 				 trace_event_name(event));
3706 		return true;
3707 	}
3708 
3709 	/* Offsets are from the iter->ent that points to the raw event */
3710 	ptr = iter->ent;
3711 
3712 	list_for_each_entry(field, head, link) {
3713 		const char *str;
3714 		bool good;
3715 
3716 		if (!field->needs_test)
3717 			continue;
3718 
3719 		str = *(const char **)(ptr + field->offset);
3720 
3721 		good = trace_safe_str(iter, str);
3722 
3723 		/*
3724 		 * If you hit this warning, it is likely that the
3725 		 * trace event in question used %s on a string that
3726 		 * was saved at the time of the event, but may not be
3727 		 * around when the trace is read. Use __string(),
3728 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3729 		 * instead. See samples/trace_events/trace-events-sample.h
3730 		 * for reference.
3731 		 */
3732 		if (WARN_ONCE(!good, "event '%s' has unsafe pointer field '%s'",
3733 			      trace_event_name(event), field->name)) {
3734 			trace_seq_printf(seq, "EVENT %s: HAS UNSAFE POINTER FIELD '%s'\n",
3735 					 trace_event_name(event), field->name);
3736 			return true;
3737 		}
3738 	}
3739 	return false;
3740 }
3741 
trace_event_format(struct trace_iterator * iter,const char * fmt)3742 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3743 {
3744 	const char *p, *new_fmt;
3745 	char *q;
3746 
3747 	if (WARN_ON_ONCE(!fmt))
3748 		return fmt;
3749 
3750 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3751 		return fmt;
3752 
3753 	p = fmt;
3754 	new_fmt = q = iter->fmt;
3755 	while (*p) {
3756 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3757 			if (!trace_iter_expand_format(iter))
3758 				return fmt;
3759 
3760 			q += iter->fmt - new_fmt;
3761 			new_fmt = iter->fmt;
3762 		}
3763 
3764 		*q++ = *p++;
3765 
3766 		/* Replace %p with %px */
3767 		if (p[-1] == '%') {
3768 			if (p[0] == '%') {
3769 				*q++ = *p++;
3770 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3771 				*q++ = *p++;
3772 				*q++ = 'x';
3773 			}
3774 		}
3775 	}
3776 	*q = '\0';
3777 
3778 	return new_fmt;
3779 }
3780 
3781 #define STATIC_TEMP_BUF_SIZE	128
3782 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3783 
3784 /* Find the next real entry, without updating the iterator itself */
trace_find_next_entry(struct trace_iterator * iter,int * ent_cpu,u64 * ent_ts)3785 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3786 					  int *ent_cpu, u64 *ent_ts)
3787 {
3788 	/* __find_next_entry will reset ent_size */
3789 	int ent_size = iter->ent_size;
3790 	struct trace_entry *entry;
3791 
3792 	/*
3793 	 * If called from ftrace_dump(), then the iter->temp buffer
3794 	 * will be the static_temp_buf and not created from kmalloc.
3795 	 * If the entry size is greater than the buffer, we can
3796 	 * not save it. Just return NULL in that case. This is only
3797 	 * used to add markers when two consecutive events' time
3798 	 * stamps have a large delta. See trace_print_lat_context()
3799 	 */
3800 	if (iter->temp == static_temp_buf &&
3801 	    STATIC_TEMP_BUF_SIZE < ent_size)
3802 		return NULL;
3803 
3804 	/*
3805 	 * The __find_next_entry() may call peek_next_entry(), which may
3806 	 * call ring_buffer_peek() that may make the contents of iter->ent
3807 	 * undefined. Need to copy iter->ent now.
3808 	 */
3809 	if (iter->ent && iter->ent != iter->temp) {
3810 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3811 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3812 			void *temp;
3813 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3814 			if (!temp)
3815 				return NULL;
3816 			kfree(iter->temp);
3817 			iter->temp = temp;
3818 			iter->temp_size = iter->ent_size;
3819 		}
3820 		memcpy(iter->temp, iter->ent, iter->ent_size);
3821 		iter->ent = iter->temp;
3822 	}
3823 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3824 	/* Put back the original ent_size */
3825 	iter->ent_size = ent_size;
3826 
3827 	return entry;
3828 }
3829 
3830 /* Find the next real entry, and increment the iterator to the next entry */
trace_find_next_entry_inc(struct trace_iterator * iter)3831 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3832 {
3833 	iter->ent = __find_next_entry(iter, &iter->cpu,
3834 				      &iter->lost_events, &iter->ts);
3835 
3836 	if (iter->ent)
3837 		trace_iterator_increment(iter);
3838 
3839 	return iter->ent ? iter : NULL;
3840 }
3841 
trace_consume(struct trace_iterator * iter)3842 static void trace_consume(struct trace_iterator *iter)
3843 {
3844 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3845 			    &iter->lost_events);
3846 }
3847 
s_next(struct seq_file * m,void * v,loff_t * pos)3848 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3849 {
3850 	struct trace_iterator *iter = m->private;
3851 	int i = (int)*pos;
3852 	void *ent;
3853 
3854 	WARN_ON_ONCE(iter->leftover);
3855 
3856 	(*pos)++;
3857 
3858 	/* can't go backwards */
3859 	if (iter->idx > i)
3860 		return NULL;
3861 
3862 	if (iter->idx < 0)
3863 		ent = trace_find_next_entry_inc(iter);
3864 	else
3865 		ent = iter;
3866 
3867 	while (ent && iter->idx < i)
3868 		ent = trace_find_next_entry_inc(iter);
3869 
3870 	iter->pos = *pos;
3871 
3872 	return ent;
3873 }
3874 
tracing_iter_reset(struct trace_iterator * iter,int cpu)3875 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
3876 {
3877 	struct ring_buffer_iter *buf_iter;
3878 	unsigned long entries = 0;
3879 	u64 ts;
3880 
3881 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
3882 
3883 	buf_iter = trace_buffer_iter(iter, cpu);
3884 	if (!buf_iter)
3885 		return;
3886 
3887 	ring_buffer_iter_reset(buf_iter);
3888 
3889 	/*
3890 	 * We could have the case with the max latency tracers
3891 	 * that a reset never took place on a cpu. This is evident
3892 	 * by the timestamp being before the start of the buffer.
3893 	 */
3894 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
3895 		if (ts >= iter->array_buffer->time_start)
3896 			break;
3897 		entries++;
3898 		ring_buffer_iter_advance(buf_iter);
3899 		/* This could be a big loop */
3900 		cond_resched();
3901 	}
3902 
3903 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
3904 }
3905 
3906 /*
3907  * The current tracer is copied to avoid a global locking
3908  * all around.
3909  */
s_start(struct seq_file * m,loff_t * pos)3910 static void *s_start(struct seq_file *m, loff_t *pos)
3911 {
3912 	struct trace_iterator *iter = m->private;
3913 	struct trace_array *tr = iter->tr;
3914 	int cpu_file = iter->cpu_file;
3915 	void *p = NULL;
3916 	loff_t l = 0;
3917 	int cpu;
3918 
3919 	mutex_lock(&trace_types_lock);
3920 	if (unlikely(tr->current_trace != iter->trace)) {
3921 		/* Close iter->trace before switching to the new current tracer */
3922 		if (iter->trace->close)
3923 			iter->trace->close(iter);
3924 		iter->trace = tr->current_trace;
3925 		/* Reopen the new current tracer */
3926 		if (iter->trace->open)
3927 			iter->trace->open(iter);
3928 	}
3929 	mutex_unlock(&trace_types_lock);
3930 
3931 #ifdef CONFIG_TRACER_MAX_TRACE
3932 	if (iter->snapshot && iter->trace->use_max_tr)
3933 		return ERR_PTR(-EBUSY);
3934 #endif
3935 
3936 	if (*pos != iter->pos) {
3937 		iter->ent = NULL;
3938 		iter->cpu = 0;
3939 		iter->idx = -1;
3940 
3941 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
3942 			for_each_tracing_cpu(cpu)
3943 				tracing_iter_reset(iter, cpu);
3944 		} else
3945 			tracing_iter_reset(iter, cpu_file);
3946 
3947 		iter->leftover = 0;
3948 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
3949 			;
3950 
3951 	} else {
3952 		/*
3953 		 * If we overflowed the seq_file before, then we want
3954 		 * to just reuse the trace_seq buffer again.
3955 		 */
3956 		if (iter->leftover)
3957 			p = iter;
3958 		else {
3959 			l = *pos - 1;
3960 			p = s_next(m, p, &l);
3961 		}
3962 	}
3963 
3964 	trace_event_read_lock();
3965 	trace_access_lock(cpu_file);
3966 	return p;
3967 }
3968 
s_stop(struct seq_file * m,void * p)3969 static void s_stop(struct seq_file *m, void *p)
3970 {
3971 	struct trace_iterator *iter = m->private;
3972 
3973 #ifdef CONFIG_TRACER_MAX_TRACE
3974 	if (iter->snapshot && iter->trace->use_max_tr)
3975 		return;
3976 #endif
3977 
3978 	trace_access_unlock(iter->cpu_file);
3979 	trace_event_read_unlock();
3980 }
3981 
3982 static void
get_total_entries_cpu(struct array_buffer * buf,unsigned long * total,unsigned long * entries,int cpu)3983 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
3984 		      unsigned long *entries, int cpu)
3985 {
3986 	unsigned long count;
3987 
3988 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
3989 	/*
3990 	 * If this buffer has skipped entries, then we hold all
3991 	 * entries for the trace and we need to ignore the
3992 	 * ones before the time stamp.
3993 	 */
3994 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
3995 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
3996 		/* total is the same as the entries */
3997 		*total = count;
3998 	} else
3999 		*total = count +
4000 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4001 	*entries = count;
4002 }
4003 
4004 static void
get_total_entries(struct array_buffer * buf,unsigned long * total,unsigned long * entries)4005 get_total_entries(struct array_buffer *buf,
4006 		  unsigned long *total, unsigned long *entries)
4007 {
4008 	unsigned long t, e;
4009 	int cpu;
4010 
4011 	*total = 0;
4012 	*entries = 0;
4013 
4014 	for_each_tracing_cpu(cpu) {
4015 		get_total_entries_cpu(buf, &t, &e, cpu);
4016 		*total += t;
4017 		*entries += e;
4018 	}
4019 }
4020 
trace_total_entries_cpu(struct trace_array * tr,int cpu)4021 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4022 {
4023 	unsigned long total, entries;
4024 
4025 	if (!tr)
4026 		tr = &global_trace;
4027 
4028 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4029 
4030 	return entries;
4031 }
4032 
trace_total_entries(struct trace_array * tr)4033 unsigned long trace_total_entries(struct trace_array *tr)
4034 {
4035 	unsigned long total, entries;
4036 
4037 	if (!tr)
4038 		tr = &global_trace;
4039 
4040 	get_total_entries(&tr->array_buffer, &total, &entries);
4041 
4042 	return entries;
4043 }
4044 
print_lat_help_header(struct seq_file * m)4045 static void print_lat_help_header(struct seq_file *m)
4046 {
4047 	seq_puts(m, "#                    _------=> CPU#            \n"
4048 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4049 		    "#                  | / _----=> need-resched    \n"
4050 		    "#                  || / _---=> hardirq/softirq \n"
4051 		    "#                  ||| / _--=> preempt-depth   \n"
4052 		    "#                  |||| / _-=> migrate-disable \n"
4053 		    "#                  ||||| /     delay           \n"
4054 		    "#  cmd     pid     |||||| time  |   caller     \n"
4055 		    "#     \\   /        ||||||  \\    |    /       \n");
4056 }
4057 
print_event_info(struct array_buffer * buf,struct seq_file * m)4058 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4059 {
4060 	unsigned long total;
4061 	unsigned long entries;
4062 
4063 	get_total_entries(buf, &total, &entries);
4064 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4065 		   entries, total, num_online_cpus());
4066 	seq_puts(m, "#\n");
4067 }
4068 
print_func_help_header(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4069 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4070 				   unsigned int flags)
4071 {
4072 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4073 
4074 	print_event_info(buf, m);
4075 
4076 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4077 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4078 }
4079 
print_func_help_header_irq(struct array_buffer * buf,struct seq_file * m,unsigned int flags)4080 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4081 				       unsigned int flags)
4082 {
4083 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4084 	static const char space[] = "            ";
4085 	int prec = tgid ? 12 : 2;
4086 
4087 	print_event_info(buf, m);
4088 
4089 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4090 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4091 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4092 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4093 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4094 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4095 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4096 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4097 }
4098 
4099 void
print_trace_header(struct seq_file * m,struct trace_iterator * iter)4100 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4101 {
4102 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4103 	struct array_buffer *buf = iter->array_buffer;
4104 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4105 	struct tracer *type = iter->trace;
4106 	unsigned long entries;
4107 	unsigned long total;
4108 	const char *name = type->name;
4109 
4110 	get_total_entries(buf, &total, &entries);
4111 
4112 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4113 		   name, init_utsname()->release);
4114 	seq_puts(m, "# -----------------------------------"
4115 		 "---------------------------------\n");
4116 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4117 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4118 		   nsecs_to_usecs(data->saved_latency),
4119 		   entries,
4120 		   total,
4121 		   buf->cpu,
4122 		   preempt_model_none()      ? "server" :
4123 		   preempt_model_voluntary() ? "desktop" :
4124 		   preempt_model_full()      ? "preempt" :
4125 		   preempt_model_rt()        ? "preempt_rt" :
4126 		   "unknown",
4127 		   /* These are reserved for later use */
4128 		   0, 0, 0, 0);
4129 #ifdef CONFIG_SMP
4130 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4131 #else
4132 	seq_puts(m, ")\n");
4133 #endif
4134 	seq_puts(m, "#    -----------------\n");
4135 	seq_printf(m, "#    | task: %.16s-%d "
4136 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4137 		   data->comm, data->pid,
4138 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4139 		   data->policy, data->rt_priority);
4140 	seq_puts(m, "#    -----------------\n");
4141 
4142 	if (data->critical_start) {
4143 		seq_puts(m, "#  => started at: ");
4144 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4145 		trace_print_seq(m, &iter->seq);
4146 		seq_puts(m, "\n#  => ended at:   ");
4147 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4148 		trace_print_seq(m, &iter->seq);
4149 		seq_puts(m, "\n#\n");
4150 	}
4151 
4152 	seq_puts(m, "#\n");
4153 }
4154 
test_cpu_buff_start(struct trace_iterator * iter)4155 static void test_cpu_buff_start(struct trace_iterator *iter)
4156 {
4157 	struct trace_seq *s = &iter->seq;
4158 	struct trace_array *tr = iter->tr;
4159 
4160 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4161 		return;
4162 
4163 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4164 		return;
4165 
4166 	if (cpumask_available(iter->started) &&
4167 	    cpumask_test_cpu(iter->cpu, iter->started))
4168 		return;
4169 
4170 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4171 		return;
4172 
4173 	if (cpumask_available(iter->started))
4174 		cpumask_set_cpu(iter->cpu, iter->started);
4175 
4176 	/* Don't print started cpu buffer for the first entry of the trace */
4177 	if (iter->idx > 1)
4178 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4179 				iter->cpu);
4180 }
4181 
print_trace_fmt(struct trace_iterator * iter)4182 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4183 {
4184 	struct trace_array *tr = iter->tr;
4185 	struct trace_seq *s = &iter->seq;
4186 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4187 	struct trace_entry *entry;
4188 	struct trace_event *event;
4189 
4190 	entry = iter->ent;
4191 
4192 	test_cpu_buff_start(iter);
4193 
4194 	event = ftrace_find_event(entry->type);
4195 
4196 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4197 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4198 			trace_print_lat_context(iter);
4199 		else
4200 			trace_print_context(iter);
4201 	}
4202 
4203 	if (trace_seq_has_overflowed(s))
4204 		return TRACE_TYPE_PARTIAL_LINE;
4205 
4206 	if (event) {
4207 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4208 			return print_event_fields(iter, event);
4209 		/*
4210 		 * For TRACE_EVENT() events, the print_fmt is not
4211 		 * safe to use if the array has delta offsets
4212 		 * Force printing via the fields.
4213 		 */
4214 		if ((tr->text_delta || tr->data_delta) &&
4215 		    event->type > __TRACE_LAST_TYPE)
4216 			return print_event_fields(iter, event);
4217 
4218 		return event->funcs->trace(iter, sym_flags, event);
4219 	}
4220 
4221 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4222 
4223 	return trace_handle_return(s);
4224 }
4225 
print_raw_fmt(struct trace_iterator * iter)4226 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4227 {
4228 	struct trace_array *tr = iter->tr;
4229 	struct trace_seq *s = &iter->seq;
4230 	struct trace_entry *entry;
4231 	struct trace_event *event;
4232 
4233 	entry = iter->ent;
4234 
4235 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4236 		trace_seq_printf(s, "%d %d %llu ",
4237 				 entry->pid, iter->cpu, iter->ts);
4238 
4239 	if (trace_seq_has_overflowed(s))
4240 		return TRACE_TYPE_PARTIAL_LINE;
4241 
4242 	event = ftrace_find_event(entry->type);
4243 	if (event)
4244 		return event->funcs->raw(iter, 0, event);
4245 
4246 	trace_seq_printf(s, "%d ?\n", entry->type);
4247 
4248 	return trace_handle_return(s);
4249 }
4250 
print_hex_fmt(struct trace_iterator * iter)4251 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4252 {
4253 	struct trace_array *tr = iter->tr;
4254 	struct trace_seq *s = &iter->seq;
4255 	unsigned char newline = '\n';
4256 	struct trace_entry *entry;
4257 	struct trace_event *event;
4258 
4259 	entry = iter->ent;
4260 
4261 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4262 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4263 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4264 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4265 		if (trace_seq_has_overflowed(s))
4266 			return TRACE_TYPE_PARTIAL_LINE;
4267 	}
4268 
4269 	event = ftrace_find_event(entry->type);
4270 	if (event) {
4271 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4272 		if (ret != TRACE_TYPE_HANDLED)
4273 			return ret;
4274 	}
4275 
4276 	SEQ_PUT_FIELD(s, newline);
4277 
4278 	return trace_handle_return(s);
4279 }
4280 
print_bin_fmt(struct trace_iterator * iter)4281 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4282 {
4283 	struct trace_array *tr = iter->tr;
4284 	struct trace_seq *s = &iter->seq;
4285 	struct trace_entry *entry;
4286 	struct trace_event *event;
4287 
4288 	entry = iter->ent;
4289 
4290 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4291 		SEQ_PUT_FIELD(s, entry->pid);
4292 		SEQ_PUT_FIELD(s, iter->cpu);
4293 		SEQ_PUT_FIELD(s, iter->ts);
4294 		if (trace_seq_has_overflowed(s))
4295 			return TRACE_TYPE_PARTIAL_LINE;
4296 	}
4297 
4298 	event = ftrace_find_event(entry->type);
4299 	return event ? event->funcs->binary(iter, 0, event) :
4300 		TRACE_TYPE_HANDLED;
4301 }
4302 
trace_empty(struct trace_iterator * iter)4303 int trace_empty(struct trace_iterator *iter)
4304 {
4305 	struct ring_buffer_iter *buf_iter;
4306 	int cpu;
4307 
4308 	/* If we are looking at one CPU buffer, only check that one */
4309 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4310 		cpu = iter->cpu_file;
4311 		buf_iter = trace_buffer_iter(iter, cpu);
4312 		if (buf_iter) {
4313 			if (!ring_buffer_iter_empty(buf_iter))
4314 				return 0;
4315 		} else {
4316 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4317 				return 0;
4318 		}
4319 		return 1;
4320 	}
4321 
4322 	for_each_tracing_cpu(cpu) {
4323 		buf_iter = trace_buffer_iter(iter, cpu);
4324 		if (buf_iter) {
4325 			if (!ring_buffer_iter_empty(buf_iter))
4326 				return 0;
4327 		} else {
4328 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4329 				return 0;
4330 		}
4331 	}
4332 
4333 	return 1;
4334 }
4335 
4336 /*  Called with trace_event_read_lock() held. */
print_trace_line(struct trace_iterator * iter)4337 enum print_line_t print_trace_line(struct trace_iterator *iter)
4338 {
4339 	struct trace_array *tr = iter->tr;
4340 	unsigned long trace_flags = tr->trace_flags;
4341 	enum print_line_t ret;
4342 
4343 	if (iter->lost_events) {
4344 		if (iter->lost_events == (unsigned long)-1)
4345 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4346 					 iter->cpu);
4347 		else
4348 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4349 					 iter->cpu, iter->lost_events);
4350 		if (trace_seq_has_overflowed(&iter->seq))
4351 			return TRACE_TYPE_PARTIAL_LINE;
4352 	}
4353 
4354 	if (iter->trace && iter->trace->print_line) {
4355 		ret = iter->trace->print_line(iter);
4356 		if (ret != TRACE_TYPE_UNHANDLED)
4357 			return ret;
4358 	}
4359 
4360 	if (iter->ent->type == TRACE_BPUTS &&
4361 			trace_flags & TRACE_ITER_PRINTK &&
4362 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4363 		return trace_print_bputs_msg_only(iter);
4364 
4365 	if (iter->ent->type == TRACE_BPRINT &&
4366 			trace_flags & TRACE_ITER_PRINTK &&
4367 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4368 		return trace_print_bprintk_msg_only(iter);
4369 
4370 	if (iter->ent->type == TRACE_PRINT &&
4371 			trace_flags & TRACE_ITER_PRINTK &&
4372 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4373 		return trace_print_printk_msg_only(iter);
4374 
4375 	if (trace_flags & TRACE_ITER_BIN)
4376 		return print_bin_fmt(iter);
4377 
4378 	if (trace_flags & TRACE_ITER_HEX)
4379 		return print_hex_fmt(iter);
4380 
4381 	if (trace_flags & TRACE_ITER_RAW)
4382 		return print_raw_fmt(iter);
4383 
4384 	return print_trace_fmt(iter);
4385 }
4386 
trace_latency_header(struct seq_file * m)4387 void trace_latency_header(struct seq_file *m)
4388 {
4389 	struct trace_iterator *iter = m->private;
4390 	struct trace_array *tr = iter->tr;
4391 
4392 	/* print nothing if the buffers are empty */
4393 	if (trace_empty(iter))
4394 		return;
4395 
4396 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4397 		print_trace_header(m, iter);
4398 
4399 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4400 		print_lat_help_header(m);
4401 }
4402 
trace_default_header(struct seq_file * m)4403 void trace_default_header(struct seq_file *m)
4404 {
4405 	struct trace_iterator *iter = m->private;
4406 	struct trace_array *tr = iter->tr;
4407 	unsigned long trace_flags = tr->trace_flags;
4408 
4409 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4410 		return;
4411 
4412 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4413 		/* print nothing if the buffers are empty */
4414 		if (trace_empty(iter))
4415 			return;
4416 		print_trace_header(m, iter);
4417 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4418 			print_lat_help_header(m);
4419 	} else {
4420 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4421 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4422 				print_func_help_header_irq(iter->array_buffer,
4423 							   m, trace_flags);
4424 			else
4425 				print_func_help_header(iter->array_buffer, m,
4426 						       trace_flags);
4427 		}
4428 	}
4429 }
4430 
test_ftrace_alive(struct seq_file * m)4431 static void test_ftrace_alive(struct seq_file *m)
4432 {
4433 	if (!ftrace_is_dead())
4434 		return;
4435 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4436 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4437 }
4438 
4439 #ifdef CONFIG_TRACER_MAX_TRACE
show_snapshot_main_help(struct seq_file * m)4440 static void show_snapshot_main_help(struct seq_file *m)
4441 {
4442 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4443 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4444 		    "#                      Takes a snapshot of the main buffer.\n"
4445 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4446 		    "#                      (Doesn't have to be '2' works with any number that\n"
4447 		    "#                       is not a '0' or '1')\n");
4448 }
4449 
show_snapshot_percpu_help(struct seq_file * m)4450 static void show_snapshot_percpu_help(struct seq_file *m)
4451 {
4452 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4453 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4454 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4455 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4456 #else
4457 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4458 		    "#                     Must use main snapshot file to allocate.\n");
4459 #endif
4460 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4461 		    "#                      (Doesn't have to be '2' works with any number that\n"
4462 		    "#                       is not a '0' or '1')\n");
4463 }
4464 
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4465 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4466 {
4467 	if (iter->tr->allocated_snapshot)
4468 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4469 	else
4470 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4471 
4472 	seq_puts(m, "# Snapshot commands:\n");
4473 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4474 		show_snapshot_main_help(m);
4475 	else
4476 		show_snapshot_percpu_help(m);
4477 }
4478 #else
4479 /* Should never be called */
print_snapshot_help(struct seq_file * m,struct trace_iterator * iter)4480 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4481 #endif
4482 
s_show(struct seq_file * m,void * v)4483 static int s_show(struct seq_file *m, void *v)
4484 {
4485 	struct trace_iterator *iter = v;
4486 	int ret;
4487 
4488 	if (iter->ent == NULL) {
4489 		if (iter->tr) {
4490 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4491 			seq_puts(m, "#\n");
4492 			test_ftrace_alive(m);
4493 		}
4494 		if (iter->snapshot && trace_empty(iter))
4495 			print_snapshot_help(m, iter);
4496 		else if (iter->trace && iter->trace->print_header)
4497 			iter->trace->print_header(m);
4498 		else
4499 			trace_default_header(m);
4500 
4501 	} else if (iter->leftover) {
4502 		/*
4503 		 * If we filled the seq_file buffer earlier, we
4504 		 * want to just show it now.
4505 		 */
4506 		ret = trace_print_seq(m, &iter->seq);
4507 
4508 		/* ret should this time be zero, but you never know */
4509 		iter->leftover = ret;
4510 
4511 	} else {
4512 		ret = print_trace_line(iter);
4513 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4514 			iter->seq.full = 0;
4515 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4516 		}
4517 		ret = trace_print_seq(m, &iter->seq);
4518 		/*
4519 		 * If we overflow the seq_file buffer, then it will
4520 		 * ask us for this data again at start up.
4521 		 * Use that instead.
4522 		 *  ret is 0 if seq_file write succeeded.
4523 		 *        -1 otherwise.
4524 		 */
4525 		iter->leftover = ret;
4526 	}
4527 
4528 	return 0;
4529 }
4530 
4531 /*
4532  * Should be used after trace_array_get(), trace_types_lock
4533  * ensures that i_cdev was already initialized.
4534  */
tracing_get_cpu(struct inode * inode)4535 static inline int tracing_get_cpu(struct inode *inode)
4536 {
4537 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4538 		return (long)inode->i_cdev - 1;
4539 	return RING_BUFFER_ALL_CPUS;
4540 }
4541 
4542 static const struct seq_operations tracer_seq_ops = {
4543 	.start		= s_start,
4544 	.next		= s_next,
4545 	.stop		= s_stop,
4546 	.show		= s_show,
4547 };
4548 
4549 /*
4550  * Note, as iter itself can be allocated and freed in different
4551  * ways, this function is only used to free its content, and not
4552  * the iterator itself. The only requirement to all the allocations
4553  * is that it must zero all fields (kzalloc), as freeing works with
4554  * ethier allocated content or NULL.
4555  */
free_trace_iter_content(struct trace_iterator * iter)4556 static void free_trace_iter_content(struct trace_iterator *iter)
4557 {
4558 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4559 	if (iter->fmt != static_fmt_buf)
4560 		kfree(iter->fmt);
4561 
4562 	kfree(iter->temp);
4563 	kfree(iter->buffer_iter);
4564 	mutex_destroy(&iter->mutex);
4565 	free_cpumask_var(iter->started);
4566 }
4567 
4568 static struct trace_iterator *
__tracing_open(struct inode * inode,struct file * file,bool snapshot)4569 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4570 {
4571 	struct trace_array *tr = inode->i_private;
4572 	struct trace_iterator *iter;
4573 	int cpu;
4574 
4575 	if (tracing_disabled)
4576 		return ERR_PTR(-ENODEV);
4577 
4578 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4579 	if (!iter)
4580 		return ERR_PTR(-ENOMEM);
4581 
4582 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4583 				    GFP_KERNEL);
4584 	if (!iter->buffer_iter)
4585 		goto release;
4586 
4587 	/*
4588 	 * trace_find_next_entry() may need to save off iter->ent.
4589 	 * It will place it into the iter->temp buffer. As most
4590 	 * events are less than 128, allocate a buffer of that size.
4591 	 * If one is greater, then trace_find_next_entry() will
4592 	 * allocate a new buffer to adjust for the bigger iter->ent.
4593 	 * It's not critical if it fails to get allocated here.
4594 	 */
4595 	iter->temp = kmalloc(128, GFP_KERNEL);
4596 	if (iter->temp)
4597 		iter->temp_size = 128;
4598 
4599 	/*
4600 	 * trace_event_printf() may need to modify given format
4601 	 * string to replace %p with %px so that it shows real address
4602 	 * instead of hash value. However, that is only for the event
4603 	 * tracing, other tracer may not need. Defer the allocation
4604 	 * until it is needed.
4605 	 */
4606 	iter->fmt = NULL;
4607 	iter->fmt_size = 0;
4608 
4609 	mutex_lock(&trace_types_lock);
4610 	iter->trace = tr->current_trace;
4611 
4612 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4613 		goto fail;
4614 
4615 	iter->tr = tr;
4616 
4617 #ifdef CONFIG_TRACER_MAX_TRACE
4618 	/* Currently only the top directory has a snapshot */
4619 	if (tr->current_trace->print_max || snapshot)
4620 		iter->array_buffer = &tr->max_buffer;
4621 	else
4622 #endif
4623 		iter->array_buffer = &tr->array_buffer;
4624 	iter->snapshot = snapshot;
4625 	iter->pos = -1;
4626 	iter->cpu_file = tracing_get_cpu(inode);
4627 	mutex_init(&iter->mutex);
4628 
4629 	/* Notify the tracer early; before we stop tracing. */
4630 	if (iter->trace->open)
4631 		iter->trace->open(iter);
4632 
4633 	/* Annotate start of buffers if we had overruns */
4634 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4635 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4636 
4637 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4638 	if (trace_clocks[tr->clock_id].in_ns)
4639 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4640 
4641 	/*
4642 	 * If pause-on-trace is enabled, then stop the trace while
4643 	 * dumping, unless this is the "snapshot" file
4644 	 */
4645 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4646 		tracing_stop_tr(tr);
4647 
4648 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4649 		for_each_tracing_cpu(cpu) {
4650 			iter->buffer_iter[cpu] =
4651 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4652 							 cpu, GFP_KERNEL);
4653 		}
4654 		ring_buffer_read_prepare_sync();
4655 		for_each_tracing_cpu(cpu) {
4656 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4657 			tracing_iter_reset(iter, cpu);
4658 		}
4659 	} else {
4660 		cpu = iter->cpu_file;
4661 		iter->buffer_iter[cpu] =
4662 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4663 						 cpu, GFP_KERNEL);
4664 		ring_buffer_read_prepare_sync();
4665 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4666 		tracing_iter_reset(iter, cpu);
4667 	}
4668 
4669 	mutex_unlock(&trace_types_lock);
4670 
4671 	return iter;
4672 
4673  fail:
4674 	mutex_unlock(&trace_types_lock);
4675 	free_trace_iter_content(iter);
4676 release:
4677 	seq_release_private(inode, file);
4678 	return ERR_PTR(-ENOMEM);
4679 }
4680 
tracing_open_generic(struct inode * inode,struct file * filp)4681 int tracing_open_generic(struct inode *inode, struct file *filp)
4682 {
4683 	int ret;
4684 
4685 	ret = tracing_check_open_get_tr(NULL);
4686 	if (ret)
4687 		return ret;
4688 
4689 	filp->private_data = inode->i_private;
4690 	return 0;
4691 }
4692 
tracing_is_disabled(void)4693 bool tracing_is_disabled(void)
4694 {
4695 	return (tracing_disabled) ? true: false;
4696 }
4697 
4698 /*
4699  * Open and update trace_array ref count.
4700  * Must have the current trace_array passed to it.
4701  */
tracing_open_generic_tr(struct inode * inode,struct file * filp)4702 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4703 {
4704 	struct trace_array *tr = inode->i_private;
4705 	int ret;
4706 
4707 	ret = tracing_check_open_get_tr(tr);
4708 	if (ret)
4709 		return ret;
4710 
4711 	filp->private_data = inode->i_private;
4712 
4713 	return 0;
4714 }
4715 
4716 /*
4717  * The private pointer of the inode is the trace_event_file.
4718  * Update the tr ref count associated to it.
4719  */
tracing_open_file_tr(struct inode * inode,struct file * filp)4720 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4721 {
4722 	struct trace_event_file *file = inode->i_private;
4723 	int ret;
4724 
4725 	ret = tracing_check_open_get_tr(file->tr);
4726 	if (ret)
4727 		return ret;
4728 
4729 	mutex_lock(&event_mutex);
4730 
4731 	/* Fail if the file is marked for removal */
4732 	if (file->flags & EVENT_FILE_FL_FREED) {
4733 		trace_array_put(file->tr);
4734 		ret = -ENODEV;
4735 	} else {
4736 		event_file_get(file);
4737 	}
4738 
4739 	mutex_unlock(&event_mutex);
4740 	if (ret)
4741 		return ret;
4742 
4743 	filp->private_data = inode->i_private;
4744 
4745 	return 0;
4746 }
4747 
tracing_release_file_tr(struct inode * inode,struct file * filp)4748 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4749 {
4750 	struct trace_event_file *file = inode->i_private;
4751 
4752 	trace_array_put(file->tr);
4753 	event_file_put(file);
4754 
4755 	return 0;
4756 }
4757 
tracing_single_release_file_tr(struct inode * inode,struct file * filp)4758 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4759 {
4760 	tracing_release_file_tr(inode, filp);
4761 	return single_release(inode, filp);
4762 }
4763 
tracing_mark_open(struct inode * inode,struct file * filp)4764 static int tracing_mark_open(struct inode *inode, struct file *filp)
4765 {
4766 	stream_open(inode, filp);
4767 	return tracing_open_generic_tr(inode, filp);
4768 }
4769 
tracing_release(struct inode * inode,struct file * file)4770 static int tracing_release(struct inode *inode, struct file *file)
4771 {
4772 	struct trace_array *tr = inode->i_private;
4773 	struct seq_file *m = file->private_data;
4774 	struct trace_iterator *iter;
4775 	int cpu;
4776 
4777 	if (!(file->f_mode & FMODE_READ)) {
4778 		trace_array_put(tr);
4779 		return 0;
4780 	}
4781 
4782 	/* Writes do not use seq_file */
4783 	iter = m->private;
4784 	mutex_lock(&trace_types_lock);
4785 
4786 	for_each_tracing_cpu(cpu) {
4787 		if (iter->buffer_iter[cpu])
4788 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4789 	}
4790 
4791 	if (iter->trace && iter->trace->close)
4792 		iter->trace->close(iter);
4793 
4794 	if (!iter->snapshot && tr->stop_count)
4795 		/* reenable tracing if it was previously enabled */
4796 		tracing_start_tr(tr);
4797 
4798 	__trace_array_put(tr);
4799 
4800 	mutex_unlock(&trace_types_lock);
4801 
4802 	free_trace_iter_content(iter);
4803 	seq_release_private(inode, file);
4804 
4805 	return 0;
4806 }
4807 
tracing_release_generic_tr(struct inode * inode,struct file * file)4808 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4809 {
4810 	struct trace_array *tr = inode->i_private;
4811 
4812 	trace_array_put(tr);
4813 	return 0;
4814 }
4815 
tracing_single_release_tr(struct inode * inode,struct file * file)4816 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4817 {
4818 	struct trace_array *tr = inode->i_private;
4819 
4820 	trace_array_put(tr);
4821 
4822 	return single_release(inode, file);
4823 }
4824 
tracing_open(struct inode * inode,struct file * file)4825 static int tracing_open(struct inode *inode, struct file *file)
4826 {
4827 	struct trace_array *tr = inode->i_private;
4828 	struct trace_iterator *iter;
4829 	int ret;
4830 
4831 	ret = tracing_check_open_get_tr(tr);
4832 	if (ret)
4833 		return ret;
4834 
4835 	/* If this file was open for write, then erase contents */
4836 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4837 		int cpu = tracing_get_cpu(inode);
4838 		struct array_buffer *trace_buf = &tr->array_buffer;
4839 
4840 #ifdef CONFIG_TRACER_MAX_TRACE
4841 		if (tr->current_trace->print_max)
4842 			trace_buf = &tr->max_buffer;
4843 #endif
4844 
4845 		if (cpu == RING_BUFFER_ALL_CPUS)
4846 			tracing_reset_online_cpus(trace_buf);
4847 		else
4848 			tracing_reset_cpu(trace_buf, cpu);
4849 	}
4850 
4851 	if (file->f_mode & FMODE_READ) {
4852 		iter = __tracing_open(inode, file, false);
4853 		if (IS_ERR(iter))
4854 			ret = PTR_ERR(iter);
4855 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4856 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4857 	}
4858 
4859 	if (ret < 0)
4860 		trace_array_put(tr);
4861 
4862 	return ret;
4863 }
4864 
4865 /*
4866  * Some tracers are not suitable for instance buffers.
4867  * A tracer is always available for the global array (toplevel)
4868  * or if it explicitly states that it is.
4869  */
4870 static bool
trace_ok_for_array(struct tracer * t,struct trace_array * tr)4871 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
4872 {
4873 #ifdef CONFIG_TRACER_SNAPSHOT
4874 	/* arrays with mapped buffer range do not have snapshots */
4875 	if (tr->range_addr_start && t->use_max_tr)
4876 		return false;
4877 #endif
4878 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
4879 }
4880 
4881 /* Find the next tracer that this trace array may use */
4882 static struct tracer *
get_tracer_for_array(struct trace_array * tr,struct tracer * t)4883 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
4884 {
4885 	while (t && !trace_ok_for_array(t, tr))
4886 		t = t->next;
4887 
4888 	return t;
4889 }
4890 
4891 static void *
t_next(struct seq_file * m,void * v,loff_t * pos)4892 t_next(struct seq_file *m, void *v, loff_t *pos)
4893 {
4894 	struct trace_array *tr = m->private;
4895 	struct tracer *t = v;
4896 
4897 	(*pos)++;
4898 
4899 	if (t)
4900 		t = get_tracer_for_array(tr, t->next);
4901 
4902 	return t;
4903 }
4904 
t_start(struct seq_file * m,loff_t * pos)4905 static void *t_start(struct seq_file *m, loff_t *pos)
4906 {
4907 	struct trace_array *tr = m->private;
4908 	struct tracer *t;
4909 	loff_t l = 0;
4910 
4911 	mutex_lock(&trace_types_lock);
4912 
4913 	t = get_tracer_for_array(tr, trace_types);
4914 	for (; t && l < *pos; t = t_next(m, t, &l))
4915 			;
4916 
4917 	return t;
4918 }
4919 
t_stop(struct seq_file * m,void * p)4920 static void t_stop(struct seq_file *m, void *p)
4921 {
4922 	mutex_unlock(&trace_types_lock);
4923 }
4924 
t_show(struct seq_file * m,void * v)4925 static int t_show(struct seq_file *m, void *v)
4926 {
4927 	struct tracer *t = v;
4928 
4929 	if (!t)
4930 		return 0;
4931 
4932 	seq_puts(m, t->name);
4933 	if (t->next)
4934 		seq_putc(m, ' ');
4935 	else
4936 		seq_putc(m, '\n');
4937 
4938 	return 0;
4939 }
4940 
4941 static const struct seq_operations show_traces_seq_ops = {
4942 	.start		= t_start,
4943 	.next		= t_next,
4944 	.stop		= t_stop,
4945 	.show		= t_show,
4946 };
4947 
show_traces_open(struct inode * inode,struct file * file)4948 static int show_traces_open(struct inode *inode, struct file *file)
4949 {
4950 	struct trace_array *tr = inode->i_private;
4951 	struct seq_file *m;
4952 	int ret;
4953 
4954 	ret = tracing_check_open_get_tr(tr);
4955 	if (ret)
4956 		return ret;
4957 
4958 	ret = seq_open(file, &show_traces_seq_ops);
4959 	if (ret) {
4960 		trace_array_put(tr);
4961 		return ret;
4962 	}
4963 
4964 	m = file->private_data;
4965 	m->private = tr;
4966 
4967 	return 0;
4968 }
4969 
tracing_seq_release(struct inode * inode,struct file * file)4970 static int tracing_seq_release(struct inode *inode, struct file *file)
4971 {
4972 	struct trace_array *tr = inode->i_private;
4973 
4974 	trace_array_put(tr);
4975 	return seq_release(inode, file);
4976 }
4977 
4978 static ssize_t
tracing_write_stub(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)4979 tracing_write_stub(struct file *filp, const char __user *ubuf,
4980 		   size_t count, loff_t *ppos)
4981 {
4982 	return count;
4983 }
4984 
tracing_lseek(struct file * file,loff_t offset,int whence)4985 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
4986 {
4987 	int ret;
4988 
4989 	if (file->f_mode & FMODE_READ)
4990 		ret = seq_lseek(file, offset, whence);
4991 	else
4992 		file->f_pos = ret = 0;
4993 
4994 	return ret;
4995 }
4996 
4997 static const struct file_operations tracing_fops = {
4998 	.open		= tracing_open,
4999 	.read		= seq_read,
5000 	.read_iter	= seq_read_iter,
5001 	.splice_read	= copy_splice_read,
5002 	.write		= tracing_write_stub,
5003 	.llseek		= tracing_lseek,
5004 	.release	= tracing_release,
5005 };
5006 
5007 static const struct file_operations show_traces_fops = {
5008 	.open		= show_traces_open,
5009 	.read		= seq_read,
5010 	.llseek		= seq_lseek,
5011 	.release	= tracing_seq_release,
5012 };
5013 
5014 static ssize_t
tracing_cpumask_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)5015 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5016 		     size_t count, loff_t *ppos)
5017 {
5018 	struct trace_array *tr = file_inode(filp)->i_private;
5019 	char *mask_str;
5020 	int len;
5021 
5022 	len = snprintf(NULL, 0, "%*pb\n",
5023 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5024 	mask_str = kmalloc(len, GFP_KERNEL);
5025 	if (!mask_str)
5026 		return -ENOMEM;
5027 
5028 	len = snprintf(mask_str, len, "%*pb\n",
5029 		       cpumask_pr_args(tr->tracing_cpumask));
5030 	if (len >= count) {
5031 		count = -EINVAL;
5032 		goto out_err;
5033 	}
5034 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5035 
5036 out_err:
5037 	kfree(mask_str);
5038 
5039 	return count;
5040 }
5041 
tracing_set_cpumask(struct trace_array * tr,cpumask_var_t tracing_cpumask_new)5042 int tracing_set_cpumask(struct trace_array *tr,
5043 			cpumask_var_t tracing_cpumask_new)
5044 {
5045 	int cpu;
5046 
5047 	if (!tr)
5048 		return -EINVAL;
5049 
5050 	local_irq_disable();
5051 	arch_spin_lock(&tr->max_lock);
5052 	for_each_tracing_cpu(cpu) {
5053 		/*
5054 		 * Increase/decrease the disabled counter if we are
5055 		 * about to flip a bit in the cpumask:
5056 		 */
5057 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5058 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5059 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5060 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5061 #ifdef CONFIG_TRACER_MAX_TRACE
5062 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5063 #endif
5064 		}
5065 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5066 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5067 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5068 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5069 #ifdef CONFIG_TRACER_MAX_TRACE
5070 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5071 #endif
5072 		}
5073 	}
5074 	arch_spin_unlock(&tr->max_lock);
5075 	local_irq_enable();
5076 
5077 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5078 
5079 	return 0;
5080 }
5081 
5082 static ssize_t
tracing_cpumask_write(struct file * filp,const char __user * ubuf,size_t count,loff_t * ppos)5083 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5084 		      size_t count, loff_t *ppos)
5085 {
5086 	struct trace_array *tr = file_inode(filp)->i_private;
5087 	cpumask_var_t tracing_cpumask_new;
5088 	int err;
5089 
5090 	if (count == 0 || count > KMALLOC_MAX_SIZE)
5091 		return -EINVAL;
5092 
5093 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5094 		return -ENOMEM;
5095 
5096 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5097 	if (err)
5098 		goto err_free;
5099 
5100 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5101 	if (err)
5102 		goto err_free;
5103 
5104 	free_cpumask_var(tracing_cpumask_new);
5105 
5106 	return count;
5107 
5108 err_free:
5109 	free_cpumask_var(tracing_cpumask_new);
5110 
5111 	return err;
5112 }
5113 
5114 static const struct file_operations tracing_cpumask_fops = {
5115 	.open		= tracing_open_generic_tr,
5116 	.read		= tracing_cpumask_read,
5117 	.write		= tracing_cpumask_write,
5118 	.release	= tracing_release_generic_tr,
5119 	.llseek		= generic_file_llseek,
5120 };
5121 
tracing_trace_options_show(struct seq_file * m,void * v)5122 static int tracing_trace_options_show(struct seq_file *m, void *v)
5123 {
5124 	struct tracer_opt *trace_opts;
5125 	struct trace_array *tr = m->private;
5126 	u32 tracer_flags;
5127 	int i;
5128 
5129 	mutex_lock(&trace_types_lock);
5130 	tracer_flags = tr->current_trace->flags->val;
5131 	trace_opts = tr->current_trace->flags->opts;
5132 
5133 	for (i = 0; trace_options[i]; i++) {
5134 		if (tr->trace_flags & (1 << i))
5135 			seq_printf(m, "%s\n", trace_options[i]);
5136 		else
5137 			seq_printf(m, "no%s\n", trace_options[i]);
5138 	}
5139 
5140 	for (i = 0; trace_opts[i].name; i++) {
5141 		if (tracer_flags & trace_opts[i].bit)
5142 			seq_printf(m, "%s\n", trace_opts[i].name);
5143 		else
5144 			seq_printf(m, "no%s\n", trace_opts[i].name);
5145 	}
5146 	mutex_unlock(&trace_types_lock);
5147 
5148 	return 0;
5149 }
5150 
__set_tracer_option(struct trace_array * tr,struct tracer_flags * tracer_flags,struct tracer_opt * opts,int neg)5151 static int __set_tracer_option(struct trace_array *tr,
5152 			       struct tracer_flags *tracer_flags,
5153 			       struct tracer_opt *opts, int neg)
5154 {
5155 	struct tracer *trace = tracer_flags->trace;
5156 	int ret;
5157 
5158 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5159 	if (ret)
5160 		return ret;
5161 
5162 	if (neg)
5163 		tracer_flags->val &= ~opts->bit;
5164 	else
5165 		tracer_flags->val |= opts->bit;
5166 	return 0;
5167 }
5168 
5169 /* Try to assign a tracer specific option */
set_tracer_option(struct trace_array * tr,char * cmp,int neg)5170 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5171 {
5172 	struct tracer *trace = tr->current_trace;
5173 	struct tracer_flags *tracer_flags = trace->flags;
5174 	struct tracer_opt *opts = NULL;
5175 	int i;
5176 
5177 	for (i = 0; tracer_flags->opts[i].name; i++) {
5178 		opts = &tracer_flags->opts[i];
5179 
5180 		if (strcmp(cmp, opts->name) == 0)
5181 			return __set_tracer_option(tr, trace->flags, opts, neg);
5182 	}
5183 
5184 	return -EINVAL;
5185 }
5186 
5187 /* Some tracers require overwrite to stay enabled */
trace_keep_overwrite(struct tracer * tracer,u32 mask,int set)5188 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5189 {
5190 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5191 		return -1;
5192 
5193 	return 0;
5194 }
5195 
set_tracer_flag(struct trace_array * tr,unsigned int mask,int enabled)5196 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5197 {
5198 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5199 	    (mask == TRACE_ITER_RECORD_CMD) ||
5200 	    (mask == TRACE_ITER_TRACE_PRINTK))
5201 		lockdep_assert_held(&event_mutex);
5202 
5203 	/* do nothing if flag is already set */
5204 	if (!!(tr->trace_flags & mask) == !!enabled)
5205 		return 0;
5206 
5207 	/* Give the tracer a chance to approve the change */
5208 	if (tr->current_trace->flag_changed)
5209 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5210 			return -EINVAL;
5211 
5212 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5213 		if (enabled) {
5214 			update_printk_trace(tr);
5215 		} else {
5216 			/*
5217 			 * The global_trace cannot clear this.
5218 			 * It's flag only gets cleared if another instance sets it.
5219 			 */
5220 			if (printk_trace == &global_trace)
5221 				return -EINVAL;
5222 			/*
5223 			 * An instance must always have it set.
5224 			 * by default, that's the global_trace instane.
5225 			 */
5226 			if (printk_trace == tr)
5227 				update_printk_trace(&global_trace);
5228 		}
5229 	}
5230 
5231 	if (enabled)
5232 		tr->trace_flags |= mask;
5233 	else
5234 		tr->trace_flags &= ~mask;
5235 
5236 	if (mask == TRACE_ITER_RECORD_CMD)
5237 		trace_event_enable_cmd_record(enabled);
5238 
5239 	if (mask == TRACE_ITER_RECORD_TGID) {
5240 
5241 		if (trace_alloc_tgid_map() < 0) {
5242 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5243 			return -ENOMEM;
5244 		}
5245 
5246 		trace_event_enable_tgid_record(enabled);
5247 	}
5248 
5249 	if (mask == TRACE_ITER_EVENT_FORK)
5250 		trace_event_follow_fork(tr, enabled);
5251 
5252 	if (mask == TRACE_ITER_FUNC_FORK)
5253 		ftrace_pid_follow_fork(tr, enabled);
5254 
5255 	if (mask == TRACE_ITER_OVERWRITE) {
5256 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5257 #ifdef CONFIG_TRACER_MAX_TRACE
5258 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5259 #endif
5260 	}
5261 
5262 	if (mask == TRACE_ITER_PRINTK) {
5263 		trace_printk_start_stop_comm(enabled);
5264 		trace_printk_control(enabled);
5265 	}
5266 
5267 	return 0;
5268 }
5269 
trace_set_options(struct trace_array * tr,char * option)5270 int trace_set_options(struct trace_array *tr, char *option)
5271 {
5272 	char *cmp;
5273 	int neg = 0;
5274 	int ret;
5275 	size_t orig_len = strlen(option);
5276 	int len;
5277 
5278 	cmp = strstrip(option);
5279 
5280 	len = str_has_prefix(cmp, "no");
5281 	if (len)
5282 		neg = 1;
5283 
5284 	cmp += len;
5285 
5286 	mutex_lock(&event_mutex);
5287 	mutex_lock(&trace_types_lock);
5288 
5289 	ret = match_string(trace_options, -1, cmp);
5290 	/* If no option could be set, test the specific tracer options */
5291 	if (ret < 0)
5292 		ret = set_tracer_option(tr, cmp, neg);
5293 	else
5294 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5295 
5296 	mutex_unlock(&trace_types_lock);
5297 	mutex_unlock(&event_mutex);
5298 
5299 	/*
5300 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5301 	 * turn it back into a space.
5302 	 */
5303 	if (orig_len > strlen(option))
5304 		option[strlen(option)] = ' ';
5305 
5306 	return ret;
5307 }
5308 
apply_trace_boot_options(void)5309 static void __init apply_trace_boot_options(void)
5310 {
5311 	char *buf = trace_boot_options_buf;
5312 	char *option;
5313 
5314 	while (true) {
5315 		option = strsep(&buf, ",");
5316 
5317 		if (!option)
5318 			break;
5319 
5320 		if (*option)
5321 			trace_set_options(&global_trace, option);
5322 
5323 		/* Put back the comma to allow this to be called again */
5324 		if (buf)
5325 			*(buf - 1) = ',';
5326 	}
5327 }
5328 
5329 static ssize_t
tracing_trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)5330 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5331 			size_t cnt, loff_t *ppos)
5332 {
5333 	struct seq_file *m = filp->private_data;
5334 	struct trace_array *tr = m->private;
5335 	char buf[64];
5336 	int ret;
5337 
5338 	if (cnt >= sizeof(buf))
5339 		return -EINVAL;
5340 
5341 	if (copy_from_user(buf, ubuf, cnt))
5342 		return -EFAULT;
5343 
5344 	buf[cnt] = 0;
5345 
5346 	ret = trace_set_options(tr, buf);
5347 	if (ret < 0)
5348 		return ret;
5349 
5350 	*ppos += cnt;
5351 
5352 	return cnt;
5353 }
5354 
tracing_trace_options_open(struct inode * inode,struct file * file)5355 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5356 {
5357 	struct trace_array *tr = inode->i_private;
5358 	int ret;
5359 
5360 	ret = tracing_check_open_get_tr(tr);
5361 	if (ret)
5362 		return ret;
5363 
5364 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5365 	if (ret < 0)
5366 		trace_array_put(tr);
5367 
5368 	return ret;
5369 }
5370 
5371 static const struct file_operations tracing_iter_fops = {
5372 	.open		= tracing_trace_options_open,
5373 	.read		= seq_read,
5374 	.llseek		= seq_lseek,
5375 	.release	= tracing_single_release_tr,
5376 	.write		= tracing_trace_options_write,
5377 };
5378 
5379 static const char readme_msg[] =
5380 	"tracing mini-HOWTO:\n\n"
5381 	"By default tracefs removes all OTH file permission bits.\n"
5382 	"When mounting tracefs an optional group id can be specified\n"
5383 	"which adds the group to every directory and file in tracefs:\n\n"
5384 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5385 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5386 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5387 	" Important files:\n"
5388 	"  trace\t\t\t- The static contents of the buffer\n"
5389 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5390 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5391 	"  current_tracer\t- function and latency tracers\n"
5392 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5393 	"  error_log\t- error log for failed commands (that support it)\n"
5394 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5395 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5396 	"  trace_clock\t\t- change the clock used to order events\n"
5397 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5398 	"      global:   Synced across CPUs but slows tracing down.\n"
5399 	"     counter:   Not a clock, but just an increment\n"
5400 	"      uptime:   Jiffy counter from time of boot\n"
5401 	"        perf:   Same clock that perf events use\n"
5402 #ifdef CONFIG_X86_64
5403 	"     x86-tsc:   TSC cycle counter\n"
5404 #endif
5405 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5406 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5407 	"    absolute:   Absolute (standalone) timestamp\n"
5408 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5409 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5410 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5411 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5412 	"\t\t\t  Remove sub-buffer with rmdir\n"
5413 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5414 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5415 	"\t\t\t  option name\n"
5416 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5417 #ifdef CONFIG_DYNAMIC_FTRACE
5418 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5419 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5420 	"\t\t\t  functions\n"
5421 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5422 	"\t     modules: Can select a group via module\n"
5423 	"\t      Format: :mod:<module-name>\n"
5424 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5425 	"\t    triggers: a command to perform when function is hit\n"
5426 	"\t      Format: <function>:<trigger>[:count]\n"
5427 	"\t     trigger: traceon, traceoff\n"
5428 	"\t\t      enable_event:<system>:<event>\n"
5429 	"\t\t      disable_event:<system>:<event>\n"
5430 #ifdef CONFIG_STACKTRACE
5431 	"\t\t      stacktrace\n"
5432 #endif
5433 #ifdef CONFIG_TRACER_SNAPSHOT
5434 	"\t\t      snapshot\n"
5435 #endif
5436 	"\t\t      dump\n"
5437 	"\t\t      cpudump\n"
5438 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5439 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5440 	"\t     The first one will disable tracing every time do_fault is hit\n"
5441 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5442 	"\t       The first time do trap is hit and it disables tracing, the\n"
5443 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5444 	"\t       the counter will not decrement. It only decrements when the\n"
5445 	"\t       trigger did work\n"
5446 	"\t     To remove trigger without count:\n"
5447 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5448 	"\t     To remove trigger with a count:\n"
5449 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5450 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5451 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5452 	"\t    modules: Can select a group via module command :mod:\n"
5453 	"\t    Does not accept triggers\n"
5454 #endif /* CONFIG_DYNAMIC_FTRACE */
5455 #ifdef CONFIG_FUNCTION_TRACER
5456 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5457 	"\t\t    (function)\n"
5458 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5459 	"\t\t    (function)\n"
5460 #endif
5461 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5462 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5463 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5464 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5465 #endif
5466 #ifdef CONFIG_TRACER_SNAPSHOT
5467 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5468 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5469 	"\t\t\t  information\n"
5470 #endif
5471 #ifdef CONFIG_STACK_TRACER
5472 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5473 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5474 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5475 	"\t\t\t  new trace)\n"
5476 #ifdef CONFIG_DYNAMIC_FTRACE
5477 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5478 	"\t\t\t  traces\n"
5479 #endif
5480 #endif /* CONFIG_STACK_TRACER */
5481 #ifdef CONFIG_DYNAMIC_EVENTS
5482 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5483 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5484 #endif
5485 #ifdef CONFIG_KPROBE_EVENTS
5486 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5487 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5488 #endif
5489 #ifdef CONFIG_UPROBE_EVENTS
5490 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5491 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5492 #endif
5493 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5494     defined(CONFIG_FPROBE_EVENTS)
5495 	"\t  accepts: event-definitions (one definition per line)\n"
5496 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5497 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5498 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5499 #endif
5500 #ifdef CONFIG_FPROBE_EVENTS
5501 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5502 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5503 #endif
5504 #ifdef CONFIG_HIST_TRIGGERS
5505 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5506 #endif
5507 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5508 	"\t           -:[<group>/][<event>]\n"
5509 #ifdef CONFIG_KPROBE_EVENTS
5510 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5511   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5512 #endif
5513 #ifdef CONFIG_UPROBE_EVENTS
5514   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5515 #endif
5516 	"\t     args: <name>=fetcharg[:type]\n"
5517 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5518 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5519 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5520 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5521 	"\t           <argname>[->field[->field|.field...]],\n"
5522 #endif
5523 #else
5524 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5525 #endif
5526 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5527 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5528 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5529 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5530 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5531 #ifdef CONFIG_HIST_TRIGGERS
5532 	"\t    field: <stype> <name>;\n"
5533 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5534 	"\t           [unsigned] char/int/long\n"
5535 #endif
5536 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5537 	"\t            of the <attached-group>/<attached-event>.\n"
5538 #endif
5539 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5540 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5541 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5542 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5543 	"\t\t\t  events\n"
5544 	"      filter\t\t- If set, only events passing filter are traced\n"
5545 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5546 	"\t\t\t  <event>:\n"
5547 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5548 	"      filter\t\t- If set, only events passing filter are traced\n"
5549 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5550 	"\t    Format: <trigger>[:count][if <filter>]\n"
5551 	"\t   trigger: traceon, traceoff\n"
5552 	"\t            enable_event:<system>:<event>\n"
5553 	"\t            disable_event:<system>:<event>\n"
5554 #ifdef CONFIG_HIST_TRIGGERS
5555 	"\t            enable_hist:<system>:<event>\n"
5556 	"\t            disable_hist:<system>:<event>\n"
5557 #endif
5558 #ifdef CONFIG_STACKTRACE
5559 	"\t\t    stacktrace\n"
5560 #endif
5561 #ifdef CONFIG_TRACER_SNAPSHOT
5562 	"\t\t    snapshot\n"
5563 #endif
5564 #ifdef CONFIG_HIST_TRIGGERS
5565 	"\t\t    hist (see below)\n"
5566 #endif
5567 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5568 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5569 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5570 	"\t                  events/block/block_unplug/trigger\n"
5571 	"\t   The first disables tracing every time block_unplug is hit.\n"
5572 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5573 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5574 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5575 	"\t   Like function triggers, the counter is only decremented if it\n"
5576 	"\t    enabled or disabled tracing.\n"
5577 	"\t   To remove a trigger without a count:\n"
5578 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5579 	"\t   To remove a trigger with a count:\n"
5580 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5581 	"\t   Filters can be ignored when removing a trigger.\n"
5582 #ifdef CONFIG_HIST_TRIGGERS
5583 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5584 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5585 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5586 	"\t            [:values=<field1[,field2,...]>]\n"
5587 	"\t            [:sort=<field1[,field2,...]>]\n"
5588 	"\t            [:size=#entries]\n"
5589 	"\t            [:pause][:continue][:clear]\n"
5590 	"\t            [:name=histname1]\n"
5591 	"\t            [:nohitcount]\n"
5592 	"\t            [:<handler>.<action>]\n"
5593 	"\t            [if <filter>]\n\n"
5594 	"\t    Note, special fields can be used as well:\n"
5595 	"\t            common_timestamp - to record current timestamp\n"
5596 	"\t            common_cpu - to record the CPU the event happened on\n"
5597 	"\n"
5598 	"\t    A hist trigger variable can be:\n"
5599 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5600 	"\t        - a reference to another variable e.g. y=$x,\n"
5601 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5602 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5603 	"\n"
5604 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5605 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5606 	"\t    variable reference, field or numeric literal.\n"
5607 	"\n"
5608 	"\t    When a matching event is hit, an entry is added to a hash\n"
5609 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5610 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5611 	"\t    correspond to fields in the event's format description.  Keys\n"
5612 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5613 	"\t    Compound keys consisting of up to two fields can be specified\n"
5614 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5615 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5616 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5617 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5618 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5619 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5620 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5621 	"\t    its histogram data will be shared with other triggers of the\n"
5622 	"\t    same name, and trigger hits will update this common data.\n\n"
5623 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5624 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5625 	"\t    triggers attached to an event, there will be a table for each\n"
5626 	"\t    trigger in the output.  The table displayed for a named\n"
5627 	"\t    trigger will be the same as any other instance having the\n"
5628 	"\t    same name.  The default format used to display a given field\n"
5629 	"\t    can be modified by appending any of the following modifiers\n"
5630 	"\t    to the field name, as applicable:\n\n"
5631 	"\t            .hex        display a number as a hex value\n"
5632 	"\t            .sym        display an address as a symbol\n"
5633 	"\t            .sym-offset display an address as a symbol and offset\n"
5634 	"\t            .execname   display a common_pid as a program name\n"
5635 	"\t            .syscall    display a syscall id as a syscall name\n"
5636 	"\t            .log2       display log2 value rather than raw number\n"
5637 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5638 	"\t            .usecs      display a common_timestamp in microseconds\n"
5639 	"\t            .percent    display a number of percentage value\n"
5640 	"\t            .graph      display a bar-graph of a value\n\n"
5641 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5642 	"\t    trigger or to start a hist trigger but not log any events\n"
5643 	"\t    until told to do so.  'continue' can be used to start or\n"
5644 	"\t    restart a paused hist trigger.\n\n"
5645 	"\t    The 'clear' parameter will clear the contents of a running\n"
5646 	"\t    hist trigger and leave its current paused/active state\n"
5647 	"\t    unchanged.\n\n"
5648 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5649 	"\t    raw hitcount in the histogram.\n\n"
5650 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5651 	"\t    have one event conditionally start and stop another event's\n"
5652 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5653 	"\t    the enable_event and disable_event triggers.\n\n"
5654 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5655 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5656 	"\t        <handler>.<action>\n\n"
5657 	"\t    The available handlers are:\n\n"
5658 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5659 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5660 	"\t        onchange(var)            - invoke action if var changes\n\n"
5661 	"\t    The available actions are:\n\n"
5662 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5663 	"\t        save(field,...)                      - save current event fields\n"
5664 #ifdef CONFIG_TRACER_SNAPSHOT
5665 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5666 #endif
5667 #ifdef CONFIG_SYNTH_EVENTS
5668 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5669 	"\t  Write into this file to define/undefine new synthetic events.\n"
5670 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5671 #endif
5672 #endif
5673 ;
5674 
5675 static ssize_t
tracing_readme_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5676 tracing_readme_read(struct file *filp, char __user *ubuf,
5677 		       size_t cnt, loff_t *ppos)
5678 {
5679 	return simple_read_from_buffer(ubuf, cnt, ppos,
5680 					readme_msg, strlen(readme_msg));
5681 }
5682 
5683 static const struct file_operations tracing_readme_fops = {
5684 	.open		= tracing_open_generic,
5685 	.read		= tracing_readme_read,
5686 	.llseek		= generic_file_llseek,
5687 };
5688 
5689 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5690 static union trace_eval_map_item *
update_eval_map(union trace_eval_map_item * ptr)5691 update_eval_map(union trace_eval_map_item *ptr)
5692 {
5693 	if (!ptr->map.eval_string) {
5694 		if (ptr->tail.next) {
5695 			ptr = ptr->tail.next;
5696 			/* Set ptr to the next real item (skip head) */
5697 			ptr++;
5698 		} else
5699 			return NULL;
5700 	}
5701 	return ptr;
5702 }
5703 
eval_map_next(struct seq_file * m,void * v,loff_t * pos)5704 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5705 {
5706 	union trace_eval_map_item *ptr = v;
5707 
5708 	/*
5709 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5710 	 * This really should never happen.
5711 	 */
5712 	(*pos)++;
5713 	ptr = update_eval_map(ptr);
5714 	if (WARN_ON_ONCE(!ptr))
5715 		return NULL;
5716 
5717 	ptr++;
5718 	ptr = update_eval_map(ptr);
5719 
5720 	return ptr;
5721 }
5722 
eval_map_start(struct seq_file * m,loff_t * pos)5723 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5724 {
5725 	union trace_eval_map_item *v;
5726 	loff_t l = 0;
5727 
5728 	mutex_lock(&trace_eval_mutex);
5729 
5730 	v = trace_eval_maps;
5731 	if (v)
5732 		v++;
5733 
5734 	while (v && l < *pos) {
5735 		v = eval_map_next(m, v, &l);
5736 	}
5737 
5738 	return v;
5739 }
5740 
eval_map_stop(struct seq_file * m,void * v)5741 static void eval_map_stop(struct seq_file *m, void *v)
5742 {
5743 	mutex_unlock(&trace_eval_mutex);
5744 }
5745 
eval_map_show(struct seq_file * m,void * v)5746 static int eval_map_show(struct seq_file *m, void *v)
5747 {
5748 	union trace_eval_map_item *ptr = v;
5749 
5750 	seq_printf(m, "%s %ld (%s)\n",
5751 		   ptr->map.eval_string, ptr->map.eval_value,
5752 		   ptr->map.system);
5753 
5754 	return 0;
5755 }
5756 
5757 static const struct seq_operations tracing_eval_map_seq_ops = {
5758 	.start		= eval_map_start,
5759 	.next		= eval_map_next,
5760 	.stop		= eval_map_stop,
5761 	.show		= eval_map_show,
5762 };
5763 
tracing_eval_map_open(struct inode * inode,struct file * filp)5764 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5765 {
5766 	int ret;
5767 
5768 	ret = tracing_check_open_get_tr(NULL);
5769 	if (ret)
5770 		return ret;
5771 
5772 	return seq_open(filp, &tracing_eval_map_seq_ops);
5773 }
5774 
5775 static const struct file_operations tracing_eval_map_fops = {
5776 	.open		= tracing_eval_map_open,
5777 	.read		= seq_read,
5778 	.llseek		= seq_lseek,
5779 	.release	= seq_release,
5780 };
5781 
5782 static inline union trace_eval_map_item *
trace_eval_jmp_to_tail(union trace_eval_map_item * ptr)5783 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5784 {
5785 	/* Return tail of array given the head */
5786 	return ptr + ptr->head.length + 1;
5787 }
5788 
5789 static void
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5790 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5791 			   int len)
5792 {
5793 	struct trace_eval_map **stop;
5794 	struct trace_eval_map **map;
5795 	union trace_eval_map_item *map_array;
5796 	union trace_eval_map_item *ptr;
5797 
5798 	stop = start + len;
5799 
5800 	/*
5801 	 * The trace_eval_maps contains the map plus a head and tail item,
5802 	 * where the head holds the module and length of array, and the
5803 	 * tail holds a pointer to the next list.
5804 	 */
5805 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5806 	if (!map_array) {
5807 		pr_warn("Unable to allocate trace eval mapping\n");
5808 		return;
5809 	}
5810 
5811 	mutex_lock(&trace_eval_mutex);
5812 
5813 	if (!trace_eval_maps)
5814 		trace_eval_maps = map_array;
5815 	else {
5816 		ptr = trace_eval_maps;
5817 		for (;;) {
5818 			ptr = trace_eval_jmp_to_tail(ptr);
5819 			if (!ptr->tail.next)
5820 				break;
5821 			ptr = ptr->tail.next;
5822 
5823 		}
5824 		ptr->tail.next = map_array;
5825 	}
5826 	map_array->head.mod = mod;
5827 	map_array->head.length = len;
5828 	map_array++;
5829 
5830 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5831 		map_array->map = **map;
5832 		map_array++;
5833 	}
5834 	memset(map_array, 0, sizeof(*map_array));
5835 
5836 	mutex_unlock(&trace_eval_mutex);
5837 }
5838 
trace_create_eval_file(struct dentry * d_tracer)5839 static void trace_create_eval_file(struct dentry *d_tracer)
5840 {
5841 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5842 			  NULL, &tracing_eval_map_fops);
5843 }
5844 
5845 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
trace_create_eval_file(struct dentry * d_tracer)5846 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
trace_insert_eval_map_file(struct module * mod,struct trace_eval_map ** start,int len)5847 static inline void trace_insert_eval_map_file(struct module *mod,
5848 			      struct trace_eval_map **start, int len) { }
5849 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5850 
trace_insert_eval_map(struct module * mod,struct trace_eval_map ** start,int len)5851 static void trace_insert_eval_map(struct module *mod,
5852 				  struct trace_eval_map **start, int len)
5853 {
5854 	struct trace_eval_map **map;
5855 
5856 	if (len <= 0)
5857 		return;
5858 
5859 	map = start;
5860 
5861 	trace_event_eval_update(map, len);
5862 
5863 	trace_insert_eval_map_file(mod, start, len);
5864 }
5865 
5866 static ssize_t
tracing_set_trace_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)5867 tracing_set_trace_read(struct file *filp, char __user *ubuf,
5868 		       size_t cnt, loff_t *ppos)
5869 {
5870 	struct trace_array *tr = filp->private_data;
5871 	char buf[MAX_TRACER_SIZE+2];
5872 	int r;
5873 
5874 	mutex_lock(&trace_types_lock);
5875 	r = sprintf(buf, "%s\n", tr->current_trace->name);
5876 	mutex_unlock(&trace_types_lock);
5877 
5878 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
5879 }
5880 
tracer_init(struct tracer * t,struct trace_array * tr)5881 int tracer_init(struct tracer *t, struct trace_array *tr)
5882 {
5883 	tracing_reset_online_cpus(&tr->array_buffer);
5884 	return t->init(tr);
5885 }
5886 
set_buffer_entries(struct array_buffer * buf,unsigned long val)5887 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
5888 {
5889 	int cpu;
5890 
5891 	for_each_tracing_cpu(cpu)
5892 		per_cpu_ptr(buf->data, cpu)->entries = val;
5893 }
5894 
update_buffer_entries(struct array_buffer * buf,int cpu)5895 static void update_buffer_entries(struct array_buffer *buf, int cpu)
5896 {
5897 	if (cpu == RING_BUFFER_ALL_CPUS) {
5898 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
5899 	} else {
5900 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
5901 	}
5902 }
5903 
5904 #ifdef CONFIG_TRACER_MAX_TRACE
5905 /* resize @tr's buffer to the size of @size_tr's entries */
resize_buffer_duplicate_size(struct array_buffer * trace_buf,struct array_buffer * size_buf,int cpu_id)5906 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
5907 					struct array_buffer *size_buf, int cpu_id)
5908 {
5909 	int cpu, ret = 0;
5910 
5911 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
5912 		for_each_tracing_cpu(cpu) {
5913 			ret = ring_buffer_resize(trace_buf->buffer,
5914 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
5915 			if (ret < 0)
5916 				break;
5917 			per_cpu_ptr(trace_buf->data, cpu)->entries =
5918 				per_cpu_ptr(size_buf->data, cpu)->entries;
5919 		}
5920 	} else {
5921 		ret = ring_buffer_resize(trace_buf->buffer,
5922 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
5923 		if (ret == 0)
5924 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
5925 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
5926 	}
5927 
5928 	return ret;
5929 }
5930 #endif /* CONFIG_TRACER_MAX_TRACE */
5931 
__tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu)5932 static int __tracing_resize_ring_buffer(struct trace_array *tr,
5933 					unsigned long size, int cpu)
5934 {
5935 	int ret;
5936 
5937 	/*
5938 	 * If kernel or user changes the size of the ring buffer
5939 	 * we use the size that was given, and we can forget about
5940 	 * expanding it later.
5941 	 */
5942 	trace_set_ring_buffer_expanded(tr);
5943 
5944 	/* May be called before buffers are initialized */
5945 	if (!tr->array_buffer.buffer)
5946 		return 0;
5947 
5948 	/* Do not allow tracing while resizing ring buffer */
5949 	tracing_stop_tr(tr);
5950 
5951 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
5952 	if (ret < 0)
5953 		goto out_start;
5954 
5955 #ifdef CONFIG_TRACER_MAX_TRACE
5956 	if (!tr->allocated_snapshot)
5957 		goto out;
5958 
5959 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
5960 	if (ret < 0) {
5961 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
5962 						     &tr->array_buffer, cpu);
5963 		if (r < 0) {
5964 			/*
5965 			 * AARGH! We are left with different
5966 			 * size max buffer!!!!
5967 			 * The max buffer is our "snapshot" buffer.
5968 			 * When a tracer needs a snapshot (one of the
5969 			 * latency tracers), it swaps the max buffer
5970 			 * with the saved snap shot. We succeeded to
5971 			 * update the size of the main buffer, but failed to
5972 			 * update the size of the max buffer. But when we tried
5973 			 * to reset the main buffer to the original size, we
5974 			 * failed there too. This is very unlikely to
5975 			 * happen, but if it does, warn and kill all
5976 			 * tracing.
5977 			 */
5978 			WARN_ON(1);
5979 			tracing_disabled = 1;
5980 		}
5981 		goto out_start;
5982 	}
5983 
5984 	update_buffer_entries(&tr->max_buffer, cpu);
5985 
5986  out:
5987 #endif /* CONFIG_TRACER_MAX_TRACE */
5988 
5989 	update_buffer_entries(&tr->array_buffer, cpu);
5990  out_start:
5991 	tracing_start_tr(tr);
5992 	return ret;
5993 }
5994 
tracing_resize_ring_buffer(struct trace_array * tr,unsigned long size,int cpu_id)5995 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
5996 				  unsigned long size, int cpu_id)
5997 {
5998 	int ret;
5999 
6000 	mutex_lock(&trace_types_lock);
6001 
6002 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6003 		/* make sure, this cpu is enabled in the mask */
6004 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6005 			ret = -EINVAL;
6006 			goto out;
6007 		}
6008 	}
6009 
6010 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6011 	if (ret < 0)
6012 		ret = -ENOMEM;
6013 
6014 out:
6015 	mutex_unlock(&trace_types_lock);
6016 
6017 	return ret;
6018 }
6019 
update_last_data(struct trace_array * tr)6020 static void update_last_data(struct trace_array *tr)
6021 {
6022 	if (!tr->text_delta && !tr->data_delta)
6023 		return;
6024 
6025 	/*
6026 	 * Need to clear all CPU buffers as there cannot be events
6027 	 * from the previous boot mixed with events with this boot
6028 	 * as that will cause a confusing trace. Need to clear all
6029 	 * CPU buffers, even for those that may currently be offline.
6030 	 */
6031 	tracing_reset_all_cpus(&tr->array_buffer);
6032 
6033 	/* Using current data now */
6034 	tr->text_delta = 0;
6035 	tr->data_delta = 0;
6036 }
6037 
6038 /**
6039  * tracing_update_buffers - used by tracing facility to expand ring buffers
6040  * @tr: The tracing instance
6041  *
6042  * To save on memory when the tracing is never used on a system with it
6043  * configured in. The ring buffers are set to a minimum size. But once
6044  * a user starts to use the tracing facility, then they need to grow
6045  * to their default size.
6046  *
6047  * This function is to be called when a tracer is about to be used.
6048  */
tracing_update_buffers(struct trace_array * tr)6049 int tracing_update_buffers(struct trace_array *tr)
6050 {
6051 	int ret = 0;
6052 
6053 	mutex_lock(&trace_types_lock);
6054 
6055 	update_last_data(tr);
6056 
6057 	if (!tr->ring_buffer_expanded)
6058 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6059 						RING_BUFFER_ALL_CPUS);
6060 	mutex_unlock(&trace_types_lock);
6061 
6062 	return ret;
6063 }
6064 
6065 struct trace_option_dentry;
6066 
6067 static void
6068 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6069 
6070 /*
6071  * Used to clear out the tracer before deletion of an instance.
6072  * Must have trace_types_lock held.
6073  */
tracing_set_nop(struct trace_array * tr)6074 static void tracing_set_nop(struct trace_array *tr)
6075 {
6076 	if (tr->current_trace == &nop_trace)
6077 		return;
6078 
6079 	tr->current_trace->enabled--;
6080 
6081 	if (tr->current_trace->reset)
6082 		tr->current_trace->reset(tr);
6083 
6084 	tr->current_trace = &nop_trace;
6085 }
6086 
6087 static bool tracer_options_updated;
6088 
add_tracer_options(struct trace_array * tr,struct tracer * t)6089 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6090 {
6091 	/* Only enable if the directory has been created already. */
6092 	if (!tr->dir)
6093 		return;
6094 
6095 	/* Only create trace option files after update_tracer_options finish */
6096 	if (!tracer_options_updated)
6097 		return;
6098 
6099 	create_trace_option_files(tr, t);
6100 }
6101 
tracing_set_tracer(struct trace_array * tr,const char * buf)6102 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6103 {
6104 	struct tracer *t;
6105 #ifdef CONFIG_TRACER_MAX_TRACE
6106 	bool had_max_tr;
6107 #endif
6108 	int ret = 0;
6109 
6110 	mutex_lock(&trace_types_lock);
6111 
6112 	update_last_data(tr);
6113 
6114 	if (!tr->ring_buffer_expanded) {
6115 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6116 						RING_BUFFER_ALL_CPUS);
6117 		if (ret < 0)
6118 			goto out;
6119 		ret = 0;
6120 	}
6121 
6122 	for (t = trace_types; t; t = t->next) {
6123 		if (strcmp(t->name, buf) == 0)
6124 			break;
6125 	}
6126 	if (!t) {
6127 		ret = -EINVAL;
6128 		goto out;
6129 	}
6130 	if (t == tr->current_trace)
6131 		goto out;
6132 
6133 #ifdef CONFIG_TRACER_SNAPSHOT
6134 	if (t->use_max_tr) {
6135 		local_irq_disable();
6136 		arch_spin_lock(&tr->max_lock);
6137 		if (tr->cond_snapshot)
6138 			ret = -EBUSY;
6139 		arch_spin_unlock(&tr->max_lock);
6140 		local_irq_enable();
6141 		if (ret)
6142 			goto out;
6143 	}
6144 #endif
6145 	/* Some tracers won't work on kernel command line */
6146 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6147 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6148 			t->name);
6149 		goto out;
6150 	}
6151 
6152 	/* Some tracers are only allowed for the top level buffer */
6153 	if (!trace_ok_for_array(t, tr)) {
6154 		ret = -EINVAL;
6155 		goto out;
6156 	}
6157 
6158 	/* If trace pipe files are being read, we can't change the tracer */
6159 	if (tr->trace_ref) {
6160 		ret = -EBUSY;
6161 		goto out;
6162 	}
6163 
6164 	trace_branch_disable();
6165 
6166 	tr->current_trace->enabled--;
6167 
6168 	if (tr->current_trace->reset)
6169 		tr->current_trace->reset(tr);
6170 
6171 #ifdef CONFIG_TRACER_MAX_TRACE
6172 	had_max_tr = tr->current_trace->use_max_tr;
6173 
6174 	/* Current trace needs to be nop_trace before synchronize_rcu */
6175 	tr->current_trace = &nop_trace;
6176 
6177 	if (had_max_tr && !t->use_max_tr) {
6178 		/*
6179 		 * We need to make sure that the update_max_tr sees that
6180 		 * current_trace changed to nop_trace to keep it from
6181 		 * swapping the buffers after we resize it.
6182 		 * The update_max_tr is called from interrupts disabled
6183 		 * so a synchronized_sched() is sufficient.
6184 		 */
6185 		synchronize_rcu();
6186 		free_snapshot(tr);
6187 		tracing_disarm_snapshot(tr);
6188 	}
6189 
6190 	if (!had_max_tr && t->use_max_tr) {
6191 		ret = tracing_arm_snapshot_locked(tr);
6192 		if (ret)
6193 			goto out;
6194 	}
6195 #else
6196 	tr->current_trace = &nop_trace;
6197 #endif
6198 
6199 	if (t->init) {
6200 		ret = tracer_init(t, tr);
6201 		if (ret) {
6202 #ifdef CONFIG_TRACER_MAX_TRACE
6203 			if (t->use_max_tr)
6204 				tracing_disarm_snapshot(tr);
6205 #endif
6206 			goto out;
6207 		}
6208 	}
6209 
6210 	tr->current_trace = t;
6211 	tr->current_trace->enabled++;
6212 	trace_branch_enable(tr);
6213  out:
6214 	mutex_unlock(&trace_types_lock);
6215 
6216 	return ret;
6217 }
6218 
6219 static ssize_t
tracing_set_trace_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6220 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6221 			size_t cnt, loff_t *ppos)
6222 {
6223 	struct trace_array *tr = filp->private_data;
6224 	char buf[MAX_TRACER_SIZE+1];
6225 	char *name;
6226 	size_t ret;
6227 	int err;
6228 
6229 	ret = cnt;
6230 
6231 	if (cnt > MAX_TRACER_SIZE)
6232 		cnt = MAX_TRACER_SIZE;
6233 
6234 	if (copy_from_user(buf, ubuf, cnt))
6235 		return -EFAULT;
6236 
6237 	buf[cnt] = 0;
6238 
6239 	name = strim(buf);
6240 
6241 	err = tracing_set_tracer(tr, name);
6242 	if (err)
6243 		return err;
6244 
6245 	*ppos += ret;
6246 
6247 	return ret;
6248 }
6249 
6250 static ssize_t
tracing_nsecs_read(unsigned long * ptr,char __user * ubuf,size_t cnt,loff_t * ppos)6251 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6252 		   size_t cnt, loff_t *ppos)
6253 {
6254 	char buf[64];
6255 	int r;
6256 
6257 	r = snprintf(buf, sizeof(buf), "%ld\n",
6258 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6259 	if (r > sizeof(buf))
6260 		r = sizeof(buf);
6261 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6262 }
6263 
6264 static ssize_t
tracing_nsecs_write(unsigned long * ptr,const char __user * ubuf,size_t cnt,loff_t * ppos)6265 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6266 		    size_t cnt, loff_t *ppos)
6267 {
6268 	unsigned long val;
6269 	int ret;
6270 
6271 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6272 	if (ret)
6273 		return ret;
6274 
6275 	*ptr = val * 1000;
6276 
6277 	return cnt;
6278 }
6279 
6280 static ssize_t
tracing_thresh_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6281 tracing_thresh_read(struct file *filp, char __user *ubuf,
6282 		    size_t cnt, loff_t *ppos)
6283 {
6284 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6285 }
6286 
6287 static ssize_t
tracing_thresh_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6288 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6289 		     size_t cnt, loff_t *ppos)
6290 {
6291 	struct trace_array *tr = filp->private_data;
6292 	int ret;
6293 
6294 	mutex_lock(&trace_types_lock);
6295 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6296 	if (ret < 0)
6297 		goto out;
6298 
6299 	if (tr->current_trace->update_thresh) {
6300 		ret = tr->current_trace->update_thresh(tr);
6301 		if (ret < 0)
6302 			goto out;
6303 	}
6304 
6305 	ret = cnt;
6306 out:
6307 	mutex_unlock(&trace_types_lock);
6308 
6309 	return ret;
6310 }
6311 
6312 #ifdef CONFIG_TRACER_MAX_TRACE
6313 
6314 static ssize_t
tracing_max_lat_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6315 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6316 		     size_t cnt, loff_t *ppos)
6317 {
6318 	struct trace_array *tr = filp->private_data;
6319 
6320 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6321 }
6322 
6323 static ssize_t
tracing_max_lat_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6324 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6325 		      size_t cnt, loff_t *ppos)
6326 {
6327 	struct trace_array *tr = filp->private_data;
6328 
6329 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6330 }
6331 
6332 #endif
6333 
open_pipe_on_cpu(struct trace_array * tr,int cpu)6334 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6335 {
6336 	if (cpu == RING_BUFFER_ALL_CPUS) {
6337 		if (cpumask_empty(tr->pipe_cpumask)) {
6338 			cpumask_setall(tr->pipe_cpumask);
6339 			return 0;
6340 		}
6341 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6342 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6343 		return 0;
6344 	}
6345 	return -EBUSY;
6346 }
6347 
close_pipe_on_cpu(struct trace_array * tr,int cpu)6348 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6349 {
6350 	if (cpu == RING_BUFFER_ALL_CPUS) {
6351 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6352 		cpumask_clear(tr->pipe_cpumask);
6353 	} else {
6354 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6355 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6356 	}
6357 }
6358 
tracing_open_pipe(struct inode * inode,struct file * filp)6359 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6360 {
6361 	struct trace_array *tr = inode->i_private;
6362 	struct trace_iterator *iter;
6363 	int cpu;
6364 	int ret;
6365 
6366 	ret = tracing_check_open_get_tr(tr);
6367 	if (ret)
6368 		return ret;
6369 
6370 	mutex_lock(&trace_types_lock);
6371 	cpu = tracing_get_cpu(inode);
6372 	ret = open_pipe_on_cpu(tr, cpu);
6373 	if (ret)
6374 		goto fail_pipe_on_cpu;
6375 
6376 	/* create a buffer to store the information to pass to userspace */
6377 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6378 	if (!iter) {
6379 		ret = -ENOMEM;
6380 		goto fail_alloc_iter;
6381 	}
6382 
6383 	trace_seq_init(&iter->seq);
6384 	iter->trace = tr->current_trace;
6385 
6386 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6387 		ret = -ENOMEM;
6388 		goto fail;
6389 	}
6390 
6391 	/* trace pipe does not show start of buffer */
6392 	cpumask_setall(iter->started);
6393 
6394 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6395 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6396 
6397 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6398 	if (trace_clocks[tr->clock_id].in_ns)
6399 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6400 
6401 	iter->tr = tr;
6402 	iter->array_buffer = &tr->array_buffer;
6403 	iter->cpu_file = cpu;
6404 	mutex_init(&iter->mutex);
6405 	filp->private_data = iter;
6406 
6407 	if (iter->trace->pipe_open)
6408 		iter->trace->pipe_open(iter);
6409 
6410 	nonseekable_open(inode, filp);
6411 
6412 	tr->trace_ref++;
6413 
6414 	mutex_unlock(&trace_types_lock);
6415 	return ret;
6416 
6417 fail:
6418 	kfree(iter);
6419 fail_alloc_iter:
6420 	close_pipe_on_cpu(tr, cpu);
6421 fail_pipe_on_cpu:
6422 	__trace_array_put(tr);
6423 	mutex_unlock(&trace_types_lock);
6424 	return ret;
6425 }
6426 
tracing_release_pipe(struct inode * inode,struct file * file)6427 static int tracing_release_pipe(struct inode *inode, struct file *file)
6428 {
6429 	struct trace_iterator *iter = file->private_data;
6430 	struct trace_array *tr = inode->i_private;
6431 
6432 	mutex_lock(&trace_types_lock);
6433 
6434 	tr->trace_ref--;
6435 
6436 	if (iter->trace->pipe_close)
6437 		iter->trace->pipe_close(iter);
6438 	close_pipe_on_cpu(tr, iter->cpu_file);
6439 	mutex_unlock(&trace_types_lock);
6440 
6441 	free_trace_iter_content(iter);
6442 	kfree(iter);
6443 
6444 	trace_array_put(tr);
6445 
6446 	return 0;
6447 }
6448 
6449 static __poll_t
trace_poll(struct trace_iterator * iter,struct file * filp,poll_table * poll_table)6450 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6451 {
6452 	struct trace_array *tr = iter->tr;
6453 
6454 	/* Iterators are static, they should be filled or empty */
6455 	if (trace_buffer_iter(iter, iter->cpu_file))
6456 		return EPOLLIN | EPOLLRDNORM;
6457 
6458 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6459 		/*
6460 		 * Always select as readable when in blocking mode
6461 		 */
6462 		return EPOLLIN | EPOLLRDNORM;
6463 	else
6464 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6465 					     filp, poll_table, iter->tr->buffer_percent);
6466 }
6467 
6468 static __poll_t
tracing_poll_pipe(struct file * filp,poll_table * poll_table)6469 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6470 {
6471 	struct trace_iterator *iter = filp->private_data;
6472 
6473 	return trace_poll(iter, filp, poll_table);
6474 }
6475 
6476 /* Must be called with iter->mutex held. */
tracing_wait_pipe(struct file * filp)6477 static int tracing_wait_pipe(struct file *filp)
6478 {
6479 	struct trace_iterator *iter = filp->private_data;
6480 	int ret;
6481 
6482 	while (trace_empty(iter)) {
6483 
6484 		if ((filp->f_flags & O_NONBLOCK)) {
6485 			return -EAGAIN;
6486 		}
6487 
6488 		/*
6489 		 * We block until we read something and tracing is disabled.
6490 		 * We still block if tracing is disabled, but we have never
6491 		 * read anything. This allows a user to cat this file, and
6492 		 * then enable tracing. But after we have read something,
6493 		 * we give an EOF when tracing is again disabled.
6494 		 *
6495 		 * iter->pos will be 0 if we haven't read anything.
6496 		 */
6497 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6498 			break;
6499 
6500 		mutex_unlock(&iter->mutex);
6501 
6502 		ret = wait_on_pipe(iter, 0);
6503 
6504 		mutex_lock(&iter->mutex);
6505 
6506 		if (ret)
6507 			return ret;
6508 	}
6509 
6510 	return 1;
6511 }
6512 
6513 /*
6514  * Consumer reader.
6515  */
6516 static ssize_t
tracing_read_pipe(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6517 tracing_read_pipe(struct file *filp, char __user *ubuf,
6518 		  size_t cnt, loff_t *ppos)
6519 {
6520 	struct trace_iterator *iter = filp->private_data;
6521 	ssize_t sret;
6522 
6523 	/*
6524 	 * Avoid more than one consumer on a single file descriptor
6525 	 * This is just a matter of traces coherency, the ring buffer itself
6526 	 * is protected.
6527 	 */
6528 	mutex_lock(&iter->mutex);
6529 
6530 	/* return any leftover data */
6531 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6532 	if (sret != -EBUSY)
6533 		goto out;
6534 
6535 	trace_seq_init(&iter->seq);
6536 
6537 	if (iter->trace->read) {
6538 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6539 		if (sret)
6540 			goto out;
6541 	}
6542 
6543 waitagain:
6544 	sret = tracing_wait_pipe(filp);
6545 	if (sret <= 0)
6546 		goto out;
6547 
6548 	/* stop when tracing is finished */
6549 	if (trace_empty(iter)) {
6550 		sret = 0;
6551 		goto out;
6552 	}
6553 
6554 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6555 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6556 
6557 	/* reset all but tr, trace, and overruns */
6558 	trace_iterator_reset(iter);
6559 	cpumask_clear(iter->started);
6560 	trace_seq_init(&iter->seq);
6561 
6562 	trace_event_read_lock();
6563 	trace_access_lock(iter->cpu_file);
6564 	while (trace_find_next_entry_inc(iter) != NULL) {
6565 		enum print_line_t ret;
6566 		int save_len = iter->seq.seq.len;
6567 
6568 		ret = print_trace_line(iter);
6569 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6570 			/*
6571 			 * If one print_trace_line() fills entire trace_seq in one shot,
6572 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6573 			 * In this case, we need to consume it, otherwise, loop will peek
6574 			 * this event next time, resulting in an infinite loop.
6575 			 */
6576 			if (save_len == 0) {
6577 				iter->seq.full = 0;
6578 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6579 				trace_consume(iter);
6580 				break;
6581 			}
6582 
6583 			/* In other cases, don't print partial lines */
6584 			iter->seq.seq.len = save_len;
6585 			break;
6586 		}
6587 		if (ret != TRACE_TYPE_NO_CONSUME)
6588 			trace_consume(iter);
6589 
6590 		if (trace_seq_used(&iter->seq) >= cnt)
6591 			break;
6592 
6593 		/*
6594 		 * Setting the full flag means we reached the trace_seq buffer
6595 		 * size and we should leave by partial output condition above.
6596 		 * One of the trace_seq_* functions is not used properly.
6597 		 */
6598 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6599 			  iter->ent->type);
6600 	}
6601 	trace_access_unlock(iter->cpu_file);
6602 	trace_event_read_unlock();
6603 
6604 	/* Now copy what we have to the user */
6605 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6606 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6607 		trace_seq_init(&iter->seq);
6608 
6609 	/*
6610 	 * If there was nothing to send to user, in spite of consuming trace
6611 	 * entries, go back to wait for more entries.
6612 	 */
6613 	if (sret == -EBUSY)
6614 		goto waitagain;
6615 
6616 out:
6617 	mutex_unlock(&iter->mutex);
6618 
6619 	return sret;
6620 }
6621 
tracing_spd_release_pipe(struct splice_pipe_desc * spd,unsigned int idx)6622 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6623 				     unsigned int idx)
6624 {
6625 	__free_page(spd->pages[idx]);
6626 }
6627 
6628 static size_t
tracing_fill_pipe_page(size_t rem,struct trace_iterator * iter)6629 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6630 {
6631 	size_t count;
6632 	int save_len;
6633 	int ret;
6634 
6635 	/* Seq buffer is page-sized, exactly what we need. */
6636 	for (;;) {
6637 		save_len = iter->seq.seq.len;
6638 		ret = print_trace_line(iter);
6639 
6640 		if (trace_seq_has_overflowed(&iter->seq)) {
6641 			iter->seq.seq.len = save_len;
6642 			break;
6643 		}
6644 
6645 		/*
6646 		 * This should not be hit, because it should only
6647 		 * be set if the iter->seq overflowed. But check it
6648 		 * anyway to be safe.
6649 		 */
6650 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6651 			iter->seq.seq.len = save_len;
6652 			break;
6653 		}
6654 
6655 		count = trace_seq_used(&iter->seq) - save_len;
6656 		if (rem < count) {
6657 			rem = 0;
6658 			iter->seq.seq.len = save_len;
6659 			break;
6660 		}
6661 
6662 		if (ret != TRACE_TYPE_NO_CONSUME)
6663 			trace_consume(iter);
6664 		rem -= count;
6665 		if (!trace_find_next_entry_inc(iter))	{
6666 			rem = 0;
6667 			iter->ent = NULL;
6668 			break;
6669 		}
6670 	}
6671 
6672 	return rem;
6673 }
6674 
tracing_splice_read_pipe(struct file * filp,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)6675 static ssize_t tracing_splice_read_pipe(struct file *filp,
6676 					loff_t *ppos,
6677 					struct pipe_inode_info *pipe,
6678 					size_t len,
6679 					unsigned int flags)
6680 {
6681 	struct page *pages_def[PIPE_DEF_BUFFERS];
6682 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6683 	struct trace_iterator *iter = filp->private_data;
6684 	struct splice_pipe_desc spd = {
6685 		.pages		= pages_def,
6686 		.partial	= partial_def,
6687 		.nr_pages	= 0, /* This gets updated below. */
6688 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6689 		.ops		= &default_pipe_buf_ops,
6690 		.spd_release	= tracing_spd_release_pipe,
6691 	};
6692 	ssize_t ret;
6693 	size_t rem;
6694 	unsigned int i;
6695 
6696 	if (splice_grow_spd(pipe, &spd))
6697 		return -ENOMEM;
6698 
6699 	mutex_lock(&iter->mutex);
6700 
6701 	if (iter->trace->splice_read) {
6702 		ret = iter->trace->splice_read(iter, filp,
6703 					       ppos, pipe, len, flags);
6704 		if (ret)
6705 			goto out_err;
6706 	}
6707 
6708 	ret = tracing_wait_pipe(filp);
6709 	if (ret <= 0)
6710 		goto out_err;
6711 
6712 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6713 		ret = -EFAULT;
6714 		goto out_err;
6715 	}
6716 
6717 	trace_event_read_lock();
6718 	trace_access_lock(iter->cpu_file);
6719 
6720 	/* Fill as many pages as possible. */
6721 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6722 		spd.pages[i] = alloc_page(GFP_KERNEL);
6723 		if (!spd.pages[i])
6724 			break;
6725 
6726 		rem = tracing_fill_pipe_page(rem, iter);
6727 
6728 		/* Copy the data into the page, so we can start over. */
6729 		ret = trace_seq_to_buffer(&iter->seq,
6730 					  page_address(spd.pages[i]),
6731 					  trace_seq_used(&iter->seq));
6732 		if (ret < 0) {
6733 			__free_page(spd.pages[i]);
6734 			break;
6735 		}
6736 		spd.partial[i].offset = 0;
6737 		spd.partial[i].len = trace_seq_used(&iter->seq);
6738 
6739 		trace_seq_init(&iter->seq);
6740 	}
6741 
6742 	trace_access_unlock(iter->cpu_file);
6743 	trace_event_read_unlock();
6744 	mutex_unlock(&iter->mutex);
6745 
6746 	spd.nr_pages = i;
6747 
6748 	if (i)
6749 		ret = splice_to_pipe(pipe, &spd);
6750 	else
6751 		ret = 0;
6752 out:
6753 	splice_shrink_spd(&spd);
6754 	return ret;
6755 
6756 out_err:
6757 	mutex_unlock(&iter->mutex);
6758 	goto out;
6759 }
6760 
6761 static ssize_t
tracing_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6762 tracing_entries_read(struct file *filp, char __user *ubuf,
6763 		     size_t cnt, loff_t *ppos)
6764 {
6765 	struct inode *inode = file_inode(filp);
6766 	struct trace_array *tr = inode->i_private;
6767 	int cpu = tracing_get_cpu(inode);
6768 	char buf[64];
6769 	int r = 0;
6770 	ssize_t ret;
6771 
6772 	mutex_lock(&trace_types_lock);
6773 
6774 	if (cpu == RING_BUFFER_ALL_CPUS) {
6775 		int cpu, buf_size_same;
6776 		unsigned long size;
6777 
6778 		size = 0;
6779 		buf_size_same = 1;
6780 		/* check if all cpu sizes are same */
6781 		for_each_tracing_cpu(cpu) {
6782 			/* fill in the size from first enabled cpu */
6783 			if (size == 0)
6784 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6785 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6786 				buf_size_same = 0;
6787 				break;
6788 			}
6789 		}
6790 
6791 		if (buf_size_same) {
6792 			if (!tr->ring_buffer_expanded)
6793 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6794 					    size >> 10,
6795 					    trace_buf_size >> 10);
6796 			else
6797 				r = sprintf(buf, "%lu\n", size >> 10);
6798 		} else
6799 			r = sprintf(buf, "X\n");
6800 	} else
6801 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6802 
6803 	mutex_unlock(&trace_types_lock);
6804 
6805 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6806 	return ret;
6807 }
6808 
6809 static ssize_t
tracing_entries_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6810 tracing_entries_write(struct file *filp, const char __user *ubuf,
6811 		      size_t cnt, loff_t *ppos)
6812 {
6813 	struct inode *inode = file_inode(filp);
6814 	struct trace_array *tr = inode->i_private;
6815 	unsigned long val;
6816 	int ret;
6817 
6818 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6819 	if (ret)
6820 		return ret;
6821 
6822 	/* must have at least 1 entry */
6823 	if (!val)
6824 		return -EINVAL;
6825 
6826 	/* value is in KB */
6827 	val <<= 10;
6828 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6829 	if (ret < 0)
6830 		return ret;
6831 
6832 	*ppos += cnt;
6833 
6834 	return cnt;
6835 }
6836 
6837 static ssize_t
tracing_total_entries_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6838 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6839 				size_t cnt, loff_t *ppos)
6840 {
6841 	struct trace_array *tr = filp->private_data;
6842 	char buf[64];
6843 	int r, cpu;
6844 	unsigned long size = 0, expanded_size = 0;
6845 
6846 	mutex_lock(&trace_types_lock);
6847 	for_each_tracing_cpu(cpu) {
6848 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6849 		if (!tr->ring_buffer_expanded)
6850 			expanded_size += trace_buf_size >> 10;
6851 	}
6852 	if (tr->ring_buffer_expanded)
6853 		r = sprintf(buf, "%lu\n", size);
6854 	else
6855 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6856 	mutex_unlock(&trace_types_lock);
6857 
6858 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6859 }
6860 
6861 static ssize_t
tracing_last_boot_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)6862 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6863 {
6864 	struct trace_array *tr = filp->private_data;
6865 	struct seq_buf seq;
6866 	char buf[64];
6867 
6868 	seq_buf_init(&seq, buf, 64);
6869 
6870 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
6871 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
6872 
6873 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
6874 }
6875 
tracing_buffer_meta_open(struct inode * inode,struct file * filp)6876 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
6877 {
6878 	struct trace_array *tr = inode->i_private;
6879 	int cpu = tracing_get_cpu(inode);
6880 	int ret;
6881 
6882 	ret = tracing_check_open_get_tr(tr);
6883 	if (ret)
6884 		return ret;
6885 
6886 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
6887 	if (ret < 0)
6888 		__trace_array_put(tr);
6889 	return ret;
6890 }
6891 
6892 static ssize_t
tracing_free_buffer_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)6893 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
6894 			  size_t cnt, loff_t *ppos)
6895 {
6896 	/*
6897 	 * There is no need to read what the user has written, this function
6898 	 * is just to make sure that there is no error when "echo" is used
6899 	 */
6900 
6901 	*ppos += cnt;
6902 
6903 	return cnt;
6904 }
6905 
6906 static int
tracing_free_buffer_release(struct inode * inode,struct file * filp)6907 tracing_free_buffer_release(struct inode *inode, struct file *filp)
6908 {
6909 	struct trace_array *tr = inode->i_private;
6910 
6911 	/* disable tracing ? */
6912 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
6913 		tracer_tracing_off(tr);
6914 	/* resize the ring buffer to 0 */
6915 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
6916 
6917 	trace_array_put(tr);
6918 
6919 	return 0;
6920 }
6921 
6922 #define TRACE_MARKER_MAX_SIZE		4096
6923 
6924 static ssize_t
tracing_mark_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)6925 tracing_mark_write(struct file *filp, const char __user *ubuf,
6926 					size_t cnt, loff_t *fpos)
6927 {
6928 	struct trace_array *tr = filp->private_data;
6929 	struct ring_buffer_event *event;
6930 	enum event_trigger_type tt = ETT_NONE;
6931 	struct trace_buffer *buffer;
6932 	struct print_entry *entry;
6933 	int meta_size;
6934 	ssize_t written;
6935 	size_t size;
6936 	int len;
6937 
6938 /* Used in tracing_mark_raw_write() as well */
6939 #define FAULTED_STR "<faulted>"
6940 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
6941 
6942 	if (tracing_disabled)
6943 		return -EINVAL;
6944 
6945 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
6946 		return -EINVAL;
6947 
6948 	if ((ssize_t)cnt < 0)
6949 		return -EINVAL;
6950 
6951 	if (cnt > TRACE_MARKER_MAX_SIZE)
6952 		cnt = TRACE_MARKER_MAX_SIZE;
6953 
6954 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
6955  again:
6956 	size = cnt + meta_size;
6957 
6958 	/* If less than "<faulted>", then make sure we can still add that */
6959 	if (cnt < FAULTED_SIZE)
6960 		size += FAULTED_SIZE - cnt;
6961 
6962 	buffer = tr->array_buffer.buffer;
6963 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
6964 					    tracing_gen_ctx());
6965 	if (unlikely(!event)) {
6966 		/*
6967 		 * If the size was greater than what was allowed, then
6968 		 * make it smaller and try again.
6969 		 */
6970 		if (size > ring_buffer_max_event_size(buffer)) {
6971 			/* cnt < FAULTED size should never be bigger than max */
6972 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
6973 				return -EBADF;
6974 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
6975 			/* The above should only happen once */
6976 			if (WARN_ON_ONCE(cnt + meta_size == size))
6977 				return -EBADF;
6978 			goto again;
6979 		}
6980 
6981 		/* Ring buffer disabled, return as if not open for write */
6982 		return -EBADF;
6983 	}
6984 
6985 	entry = ring_buffer_event_data(event);
6986 	entry->ip = _THIS_IP_;
6987 
6988 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
6989 	if (len) {
6990 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
6991 		cnt = FAULTED_SIZE;
6992 		written = -EFAULT;
6993 	} else
6994 		written = cnt;
6995 
6996 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
6997 		/* do not add \n before testing triggers, but add \0 */
6998 		entry->buf[cnt] = '\0';
6999 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7000 	}
7001 
7002 	if (entry->buf[cnt - 1] != '\n') {
7003 		entry->buf[cnt] = '\n';
7004 		entry->buf[cnt + 1] = '\0';
7005 	} else
7006 		entry->buf[cnt] = '\0';
7007 
7008 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7009 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7010 	__buffer_unlock_commit(buffer, event);
7011 
7012 	if (tt)
7013 		event_triggers_post_call(tr->trace_marker_file, tt);
7014 
7015 	return written;
7016 }
7017 
7018 static ssize_t
tracing_mark_raw_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7019 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7020 					size_t cnt, loff_t *fpos)
7021 {
7022 	struct trace_array *tr = filp->private_data;
7023 	struct ring_buffer_event *event;
7024 	struct trace_buffer *buffer;
7025 	struct raw_data_entry *entry;
7026 	ssize_t written;
7027 	int size;
7028 	int len;
7029 
7030 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7031 
7032 	if (tracing_disabled)
7033 		return -EINVAL;
7034 
7035 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7036 		return -EINVAL;
7037 
7038 	/* The marker must at least have a tag id */
7039 	if (cnt < sizeof(unsigned int))
7040 		return -EINVAL;
7041 
7042 	size = sizeof(*entry) + cnt;
7043 	if (cnt < FAULT_SIZE_ID)
7044 		size += FAULT_SIZE_ID - cnt;
7045 
7046 	buffer = tr->array_buffer.buffer;
7047 
7048 	if (size > ring_buffer_max_event_size(buffer))
7049 		return -EINVAL;
7050 
7051 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7052 					    tracing_gen_ctx());
7053 	if (!event)
7054 		/* Ring buffer disabled, return as if not open for write */
7055 		return -EBADF;
7056 
7057 	entry = ring_buffer_event_data(event);
7058 
7059 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7060 	if (len) {
7061 		entry->id = -1;
7062 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7063 		written = -EFAULT;
7064 	} else
7065 		written = cnt;
7066 
7067 	__buffer_unlock_commit(buffer, event);
7068 
7069 	return written;
7070 }
7071 
tracing_clock_show(struct seq_file * m,void * v)7072 static int tracing_clock_show(struct seq_file *m, void *v)
7073 {
7074 	struct trace_array *tr = m->private;
7075 	int i;
7076 
7077 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7078 		seq_printf(m,
7079 			"%s%s%s%s", i ? " " : "",
7080 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7081 			i == tr->clock_id ? "]" : "");
7082 	seq_putc(m, '\n');
7083 
7084 	return 0;
7085 }
7086 
tracing_set_clock(struct trace_array * tr,const char * clockstr)7087 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7088 {
7089 	int i;
7090 
7091 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7092 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7093 			break;
7094 	}
7095 	if (i == ARRAY_SIZE(trace_clocks))
7096 		return -EINVAL;
7097 
7098 	mutex_lock(&trace_types_lock);
7099 
7100 	tr->clock_id = i;
7101 
7102 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7103 
7104 	/*
7105 	 * New clock may not be consistent with the previous clock.
7106 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7107 	 */
7108 	tracing_reset_online_cpus(&tr->array_buffer);
7109 
7110 #ifdef CONFIG_TRACER_MAX_TRACE
7111 	if (tr->max_buffer.buffer)
7112 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7113 	tracing_reset_online_cpus(&tr->max_buffer);
7114 #endif
7115 
7116 	mutex_unlock(&trace_types_lock);
7117 
7118 	return 0;
7119 }
7120 
tracing_clock_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * fpos)7121 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7122 				   size_t cnt, loff_t *fpos)
7123 {
7124 	struct seq_file *m = filp->private_data;
7125 	struct trace_array *tr = m->private;
7126 	char buf[64];
7127 	const char *clockstr;
7128 	int ret;
7129 
7130 	if (cnt >= sizeof(buf))
7131 		return -EINVAL;
7132 
7133 	if (copy_from_user(buf, ubuf, cnt))
7134 		return -EFAULT;
7135 
7136 	buf[cnt] = 0;
7137 
7138 	clockstr = strstrip(buf);
7139 
7140 	ret = tracing_set_clock(tr, clockstr);
7141 	if (ret)
7142 		return ret;
7143 
7144 	*fpos += cnt;
7145 
7146 	return cnt;
7147 }
7148 
tracing_clock_open(struct inode * inode,struct file * file)7149 static int tracing_clock_open(struct inode *inode, struct file *file)
7150 {
7151 	struct trace_array *tr = inode->i_private;
7152 	int ret;
7153 
7154 	ret = tracing_check_open_get_tr(tr);
7155 	if (ret)
7156 		return ret;
7157 
7158 	ret = single_open(file, tracing_clock_show, inode->i_private);
7159 	if (ret < 0)
7160 		trace_array_put(tr);
7161 
7162 	return ret;
7163 }
7164 
tracing_time_stamp_mode_show(struct seq_file * m,void * v)7165 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7166 {
7167 	struct trace_array *tr = m->private;
7168 
7169 	mutex_lock(&trace_types_lock);
7170 
7171 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7172 		seq_puts(m, "delta [absolute]\n");
7173 	else
7174 		seq_puts(m, "[delta] absolute\n");
7175 
7176 	mutex_unlock(&trace_types_lock);
7177 
7178 	return 0;
7179 }
7180 
tracing_time_stamp_mode_open(struct inode * inode,struct file * file)7181 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7182 {
7183 	struct trace_array *tr = inode->i_private;
7184 	int ret;
7185 
7186 	ret = tracing_check_open_get_tr(tr);
7187 	if (ret)
7188 		return ret;
7189 
7190 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7191 	if (ret < 0)
7192 		trace_array_put(tr);
7193 
7194 	return ret;
7195 }
7196 
tracing_event_time_stamp(struct trace_buffer * buffer,struct ring_buffer_event * rbe)7197 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7198 {
7199 	if (rbe == this_cpu_read(trace_buffered_event))
7200 		return ring_buffer_time_stamp(buffer);
7201 
7202 	return ring_buffer_event_time_stamp(buffer, rbe);
7203 }
7204 
7205 /*
7206  * Set or disable using the per CPU trace_buffer_event when possible.
7207  */
tracing_set_filter_buffering(struct trace_array * tr,bool set)7208 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7209 {
7210 	int ret = 0;
7211 
7212 	mutex_lock(&trace_types_lock);
7213 
7214 	if (set && tr->no_filter_buffering_ref++)
7215 		goto out;
7216 
7217 	if (!set) {
7218 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7219 			ret = -EINVAL;
7220 			goto out;
7221 		}
7222 
7223 		--tr->no_filter_buffering_ref;
7224 	}
7225  out:
7226 	mutex_unlock(&trace_types_lock);
7227 
7228 	return ret;
7229 }
7230 
7231 struct ftrace_buffer_info {
7232 	struct trace_iterator	iter;
7233 	void			*spare;
7234 	unsigned int		spare_cpu;
7235 	unsigned int		spare_size;
7236 	unsigned int		read;
7237 };
7238 
7239 #ifdef CONFIG_TRACER_SNAPSHOT
tracing_snapshot_open(struct inode * inode,struct file * file)7240 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7241 {
7242 	struct trace_array *tr = inode->i_private;
7243 	struct trace_iterator *iter;
7244 	struct seq_file *m;
7245 	int ret;
7246 
7247 	ret = tracing_check_open_get_tr(tr);
7248 	if (ret)
7249 		return ret;
7250 
7251 	if (file->f_mode & FMODE_READ) {
7252 		iter = __tracing_open(inode, file, true);
7253 		if (IS_ERR(iter))
7254 			ret = PTR_ERR(iter);
7255 	} else {
7256 		/* Writes still need the seq_file to hold the private data */
7257 		ret = -ENOMEM;
7258 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7259 		if (!m)
7260 			goto out;
7261 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7262 		if (!iter) {
7263 			kfree(m);
7264 			goto out;
7265 		}
7266 		ret = 0;
7267 
7268 		iter->tr = tr;
7269 		iter->array_buffer = &tr->max_buffer;
7270 		iter->cpu_file = tracing_get_cpu(inode);
7271 		m->private = iter;
7272 		file->private_data = m;
7273 	}
7274 out:
7275 	if (ret < 0)
7276 		trace_array_put(tr);
7277 
7278 	return ret;
7279 }
7280 
tracing_swap_cpu_buffer(void * tr)7281 static void tracing_swap_cpu_buffer(void *tr)
7282 {
7283 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7284 }
7285 
7286 static ssize_t
tracing_snapshot_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7287 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7288 		       loff_t *ppos)
7289 {
7290 	struct seq_file *m = filp->private_data;
7291 	struct trace_iterator *iter = m->private;
7292 	struct trace_array *tr = iter->tr;
7293 	unsigned long val;
7294 	int ret;
7295 
7296 	ret = tracing_update_buffers(tr);
7297 	if (ret < 0)
7298 		return ret;
7299 
7300 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7301 	if (ret)
7302 		return ret;
7303 
7304 	mutex_lock(&trace_types_lock);
7305 
7306 	if (tr->current_trace->use_max_tr) {
7307 		ret = -EBUSY;
7308 		goto out;
7309 	}
7310 
7311 	local_irq_disable();
7312 	arch_spin_lock(&tr->max_lock);
7313 	if (tr->cond_snapshot)
7314 		ret = -EBUSY;
7315 	arch_spin_unlock(&tr->max_lock);
7316 	local_irq_enable();
7317 	if (ret)
7318 		goto out;
7319 
7320 	switch (val) {
7321 	case 0:
7322 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7323 			ret = -EINVAL;
7324 			break;
7325 		}
7326 		if (tr->allocated_snapshot)
7327 			free_snapshot(tr);
7328 		break;
7329 	case 1:
7330 /* Only allow per-cpu swap if the ring buffer supports it */
7331 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7332 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7333 			ret = -EINVAL;
7334 			break;
7335 		}
7336 #endif
7337 		if (tr->allocated_snapshot)
7338 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7339 					&tr->array_buffer, iter->cpu_file);
7340 
7341 		ret = tracing_arm_snapshot_locked(tr);
7342 		if (ret)
7343 			break;
7344 
7345 		/* Now, we're going to swap */
7346 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7347 			local_irq_disable();
7348 			update_max_tr(tr, current, smp_processor_id(), NULL);
7349 			local_irq_enable();
7350 		} else {
7351 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7352 						 (void *)tr, 1);
7353 		}
7354 		tracing_disarm_snapshot(tr);
7355 		break;
7356 	default:
7357 		if (tr->allocated_snapshot) {
7358 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7359 				tracing_reset_online_cpus(&tr->max_buffer);
7360 			else
7361 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7362 		}
7363 		break;
7364 	}
7365 
7366 	if (ret >= 0) {
7367 		*ppos += cnt;
7368 		ret = cnt;
7369 	}
7370 out:
7371 	mutex_unlock(&trace_types_lock);
7372 	return ret;
7373 }
7374 
tracing_snapshot_release(struct inode * inode,struct file * file)7375 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7376 {
7377 	struct seq_file *m = file->private_data;
7378 	int ret;
7379 
7380 	ret = tracing_release(inode, file);
7381 
7382 	if (file->f_mode & FMODE_READ)
7383 		return ret;
7384 
7385 	/* If write only, the seq_file is just a stub */
7386 	if (m)
7387 		kfree(m->private);
7388 	kfree(m);
7389 
7390 	return 0;
7391 }
7392 
7393 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7394 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7395 				    size_t count, loff_t *ppos);
7396 static int tracing_buffers_release(struct inode *inode, struct file *file);
7397 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7398 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7399 
snapshot_raw_open(struct inode * inode,struct file * filp)7400 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7401 {
7402 	struct ftrace_buffer_info *info;
7403 	int ret;
7404 
7405 	/* The following checks for tracefs lockdown */
7406 	ret = tracing_buffers_open(inode, filp);
7407 	if (ret < 0)
7408 		return ret;
7409 
7410 	info = filp->private_data;
7411 
7412 	if (info->iter.trace->use_max_tr) {
7413 		tracing_buffers_release(inode, filp);
7414 		return -EBUSY;
7415 	}
7416 
7417 	info->iter.snapshot = true;
7418 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7419 
7420 	return ret;
7421 }
7422 
7423 #endif /* CONFIG_TRACER_SNAPSHOT */
7424 
7425 
7426 static const struct file_operations tracing_thresh_fops = {
7427 	.open		= tracing_open_generic,
7428 	.read		= tracing_thresh_read,
7429 	.write		= tracing_thresh_write,
7430 	.llseek		= generic_file_llseek,
7431 };
7432 
7433 #ifdef CONFIG_TRACER_MAX_TRACE
7434 static const struct file_operations tracing_max_lat_fops = {
7435 	.open		= tracing_open_generic_tr,
7436 	.read		= tracing_max_lat_read,
7437 	.write		= tracing_max_lat_write,
7438 	.llseek		= generic_file_llseek,
7439 	.release	= tracing_release_generic_tr,
7440 };
7441 #endif
7442 
7443 static const struct file_operations set_tracer_fops = {
7444 	.open		= tracing_open_generic_tr,
7445 	.read		= tracing_set_trace_read,
7446 	.write		= tracing_set_trace_write,
7447 	.llseek		= generic_file_llseek,
7448 	.release	= tracing_release_generic_tr,
7449 };
7450 
7451 static const struct file_operations tracing_pipe_fops = {
7452 	.open		= tracing_open_pipe,
7453 	.poll		= tracing_poll_pipe,
7454 	.read		= tracing_read_pipe,
7455 	.splice_read	= tracing_splice_read_pipe,
7456 	.release	= tracing_release_pipe,
7457 };
7458 
7459 static const struct file_operations tracing_entries_fops = {
7460 	.open		= tracing_open_generic_tr,
7461 	.read		= tracing_entries_read,
7462 	.write		= tracing_entries_write,
7463 	.llseek		= generic_file_llseek,
7464 	.release	= tracing_release_generic_tr,
7465 };
7466 
7467 static const struct file_operations tracing_buffer_meta_fops = {
7468 	.open		= tracing_buffer_meta_open,
7469 	.read		= seq_read,
7470 	.llseek		= seq_lseek,
7471 	.release	= tracing_seq_release,
7472 };
7473 
7474 static const struct file_operations tracing_total_entries_fops = {
7475 	.open		= tracing_open_generic_tr,
7476 	.read		= tracing_total_entries_read,
7477 	.llseek		= generic_file_llseek,
7478 	.release	= tracing_release_generic_tr,
7479 };
7480 
7481 static const struct file_operations tracing_free_buffer_fops = {
7482 	.open		= tracing_open_generic_tr,
7483 	.write		= tracing_free_buffer_write,
7484 	.release	= tracing_free_buffer_release,
7485 };
7486 
7487 static const struct file_operations tracing_mark_fops = {
7488 	.open		= tracing_mark_open,
7489 	.write		= tracing_mark_write,
7490 	.release	= tracing_release_generic_tr,
7491 };
7492 
7493 static const struct file_operations tracing_mark_raw_fops = {
7494 	.open		= tracing_mark_open,
7495 	.write		= tracing_mark_raw_write,
7496 	.release	= tracing_release_generic_tr,
7497 };
7498 
7499 static const struct file_operations trace_clock_fops = {
7500 	.open		= tracing_clock_open,
7501 	.read		= seq_read,
7502 	.llseek		= seq_lseek,
7503 	.release	= tracing_single_release_tr,
7504 	.write		= tracing_clock_write,
7505 };
7506 
7507 static const struct file_operations trace_time_stamp_mode_fops = {
7508 	.open		= tracing_time_stamp_mode_open,
7509 	.read		= seq_read,
7510 	.llseek		= seq_lseek,
7511 	.release	= tracing_single_release_tr,
7512 };
7513 
7514 static const struct file_operations last_boot_fops = {
7515 	.open		= tracing_open_generic_tr,
7516 	.read		= tracing_last_boot_read,
7517 	.llseek		= generic_file_llseek,
7518 	.release	= tracing_release_generic_tr,
7519 };
7520 
7521 #ifdef CONFIG_TRACER_SNAPSHOT
7522 static const struct file_operations snapshot_fops = {
7523 	.open		= tracing_snapshot_open,
7524 	.read		= seq_read,
7525 	.write		= tracing_snapshot_write,
7526 	.llseek		= tracing_lseek,
7527 	.release	= tracing_snapshot_release,
7528 };
7529 
7530 static const struct file_operations snapshot_raw_fops = {
7531 	.open		= snapshot_raw_open,
7532 	.read		= tracing_buffers_read,
7533 	.release	= tracing_buffers_release,
7534 	.splice_read	= tracing_buffers_splice_read,
7535 };
7536 
7537 #endif /* CONFIG_TRACER_SNAPSHOT */
7538 
7539 /*
7540  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7541  * @filp: The active open file structure
7542  * @ubuf: The userspace provided buffer to read value into
7543  * @cnt: The maximum number of bytes to read
7544  * @ppos: The current "file" position
7545  *
7546  * This function implements the write interface for a struct trace_min_max_param.
7547  * The filp->private_data must point to a trace_min_max_param structure that
7548  * defines where to write the value, the min and the max acceptable values,
7549  * and a lock to protect the write.
7550  */
7551 static ssize_t
trace_min_max_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)7552 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7553 {
7554 	struct trace_min_max_param *param = filp->private_data;
7555 	u64 val;
7556 	int err;
7557 
7558 	if (!param)
7559 		return -EFAULT;
7560 
7561 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7562 	if (err)
7563 		return err;
7564 
7565 	if (param->lock)
7566 		mutex_lock(param->lock);
7567 
7568 	if (param->min && val < *param->min)
7569 		err = -EINVAL;
7570 
7571 	if (param->max && val > *param->max)
7572 		err = -EINVAL;
7573 
7574 	if (!err)
7575 		*param->val = val;
7576 
7577 	if (param->lock)
7578 		mutex_unlock(param->lock);
7579 
7580 	if (err)
7581 		return err;
7582 
7583 	return cnt;
7584 }
7585 
7586 /*
7587  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7588  * @filp: The active open file structure
7589  * @ubuf: The userspace provided buffer to read value into
7590  * @cnt: The maximum number of bytes to read
7591  * @ppos: The current "file" position
7592  *
7593  * This function implements the read interface for a struct trace_min_max_param.
7594  * The filp->private_data must point to a trace_min_max_param struct with valid
7595  * data.
7596  */
7597 static ssize_t
trace_min_max_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)7598 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7599 {
7600 	struct trace_min_max_param *param = filp->private_data;
7601 	char buf[U64_STR_SIZE];
7602 	int len;
7603 	u64 val;
7604 
7605 	if (!param)
7606 		return -EFAULT;
7607 
7608 	val = *param->val;
7609 
7610 	if (cnt > sizeof(buf))
7611 		cnt = sizeof(buf);
7612 
7613 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7614 
7615 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7616 }
7617 
7618 const struct file_operations trace_min_max_fops = {
7619 	.open		= tracing_open_generic,
7620 	.read		= trace_min_max_read,
7621 	.write		= trace_min_max_write,
7622 };
7623 
7624 #define TRACING_LOG_ERRS_MAX	8
7625 #define TRACING_LOG_LOC_MAX	128
7626 
7627 #define CMD_PREFIX "  Command: "
7628 
7629 struct err_info {
7630 	const char	**errs;	/* ptr to loc-specific array of err strings */
7631 	u8		type;	/* index into errs -> specific err string */
7632 	u16		pos;	/* caret position */
7633 	u64		ts;
7634 };
7635 
7636 struct tracing_log_err {
7637 	struct list_head	list;
7638 	struct err_info		info;
7639 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7640 	char			*cmd;                     /* what caused err */
7641 };
7642 
7643 static DEFINE_MUTEX(tracing_err_log_lock);
7644 
alloc_tracing_log_err(int len)7645 static struct tracing_log_err *alloc_tracing_log_err(int len)
7646 {
7647 	struct tracing_log_err *err;
7648 
7649 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7650 	if (!err)
7651 		return ERR_PTR(-ENOMEM);
7652 
7653 	err->cmd = kzalloc(len, GFP_KERNEL);
7654 	if (!err->cmd) {
7655 		kfree(err);
7656 		return ERR_PTR(-ENOMEM);
7657 	}
7658 
7659 	return err;
7660 }
7661 
free_tracing_log_err(struct tracing_log_err * err)7662 static void free_tracing_log_err(struct tracing_log_err *err)
7663 {
7664 	kfree(err->cmd);
7665 	kfree(err);
7666 }
7667 
get_tracing_log_err(struct trace_array * tr,int len)7668 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7669 						   int len)
7670 {
7671 	struct tracing_log_err *err;
7672 	char *cmd;
7673 
7674 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7675 		err = alloc_tracing_log_err(len);
7676 		if (PTR_ERR(err) != -ENOMEM)
7677 			tr->n_err_log_entries++;
7678 
7679 		return err;
7680 	}
7681 	cmd = kzalloc(len, GFP_KERNEL);
7682 	if (!cmd)
7683 		return ERR_PTR(-ENOMEM);
7684 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7685 	kfree(err->cmd);
7686 	err->cmd = cmd;
7687 	list_del(&err->list);
7688 
7689 	return err;
7690 }
7691 
7692 /**
7693  * err_pos - find the position of a string within a command for error careting
7694  * @cmd: The tracing command that caused the error
7695  * @str: The string to position the caret at within @cmd
7696  *
7697  * Finds the position of the first occurrence of @str within @cmd.  The
7698  * return value can be passed to tracing_log_err() for caret placement
7699  * within @cmd.
7700  *
7701  * Returns the index within @cmd of the first occurrence of @str or 0
7702  * if @str was not found.
7703  */
err_pos(char * cmd,const char * str)7704 unsigned int err_pos(char *cmd, const char *str)
7705 {
7706 	char *found;
7707 
7708 	if (WARN_ON(!strlen(cmd)))
7709 		return 0;
7710 
7711 	found = strstr(cmd, str);
7712 	if (found)
7713 		return found - cmd;
7714 
7715 	return 0;
7716 }
7717 
7718 /**
7719  * tracing_log_err - write an error to the tracing error log
7720  * @tr: The associated trace array for the error (NULL for top level array)
7721  * @loc: A string describing where the error occurred
7722  * @cmd: The tracing command that caused the error
7723  * @errs: The array of loc-specific static error strings
7724  * @type: The index into errs[], which produces the specific static err string
7725  * @pos: The position the caret should be placed in the cmd
7726  *
7727  * Writes an error into tracing/error_log of the form:
7728  *
7729  * <loc>: error: <text>
7730  *   Command: <cmd>
7731  *              ^
7732  *
7733  * tracing/error_log is a small log file containing the last
7734  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7735  * unless there has been a tracing error, and the error log can be
7736  * cleared and have its memory freed by writing the empty string in
7737  * truncation mode to it i.e. echo > tracing/error_log.
7738  *
7739  * NOTE: the @errs array along with the @type param are used to
7740  * produce a static error string - this string is not copied and saved
7741  * when the error is logged - only a pointer to it is saved.  See
7742  * existing callers for examples of how static strings are typically
7743  * defined for use with tracing_log_err().
7744  */
tracing_log_err(struct trace_array * tr,const char * loc,const char * cmd,const char ** errs,u8 type,u16 pos)7745 void tracing_log_err(struct trace_array *tr,
7746 		     const char *loc, const char *cmd,
7747 		     const char **errs, u8 type, u16 pos)
7748 {
7749 	struct tracing_log_err *err;
7750 	int len = 0;
7751 
7752 	if (!tr)
7753 		tr = &global_trace;
7754 
7755 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7756 
7757 	mutex_lock(&tracing_err_log_lock);
7758 	err = get_tracing_log_err(tr, len);
7759 	if (PTR_ERR(err) == -ENOMEM) {
7760 		mutex_unlock(&tracing_err_log_lock);
7761 		return;
7762 	}
7763 
7764 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7765 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7766 
7767 	err->info.errs = errs;
7768 	err->info.type = type;
7769 	err->info.pos = pos;
7770 	err->info.ts = local_clock();
7771 
7772 	list_add_tail(&err->list, &tr->err_log);
7773 	mutex_unlock(&tracing_err_log_lock);
7774 }
7775 
clear_tracing_err_log(struct trace_array * tr)7776 static void clear_tracing_err_log(struct trace_array *tr)
7777 {
7778 	struct tracing_log_err *err, *next;
7779 
7780 	mutex_lock(&tracing_err_log_lock);
7781 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7782 		list_del(&err->list);
7783 		free_tracing_log_err(err);
7784 	}
7785 
7786 	tr->n_err_log_entries = 0;
7787 	mutex_unlock(&tracing_err_log_lock);
7788 }
7789 
tracing_err_log_seq_start(struct seq_file * m,loff_t * pos)7790 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7791 {
7792 	struct trace_array *tr = m->private;
7793 
7794 	mutex_lock(&tracing_err_log_lock);
7795 
7796 	return seq_list_start(&tr->err_log, *pos);
7797 }
7798 
tracing_err_log_seq_next(struct seq_file * m,void * v,loff_t * pos)7799 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7800 {
7801 	struct trace_array *tr = m->private;
7802 
7803 	return seq_list_next(v, &tr->err_log, pos);
7804 }
7805 
tracing_err_log_seq_stop(struct seq_file * m,void * v)7806 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7807 {
7808 	mutex_unlock(&tracing_err_log_lock);
7809 }
7810 
tracing_err_log_show_pos(struct seq_file * m,u16 pos)7811 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7812 {
7813 	u16 i;
7814 
7815 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7816 		seq_putc(m, ' ');
7817 	for (i = 0; i < pos; i++)
7818 		seq_putc(m, ' ');
7819 	seq_puts(m, "^\n");
7820 }
7821 
tracing_err_log_seq_show(struct seq_file * m,void * v)7822 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7823 {
7824 	struct tracing_log_err *err = v;
7825 
7826 	if (err) {
7827 		const char *err_text = err->info.errs[err->info.type];
7828 		u64 sec = err->info.ts;
7829 		u32 nsec;
7830 
7831 		nsec = do_div(sec, NSEC_PER_SEC);
7832 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7833 			   err->loc, err_text);
7834 		seq_printf(m, "%s", err->cmd);
7835 		tracing_err_log_show_pos(m, err->info.pos);
7836 	}
7837 
7838 	return 0;
7839 }
7840 
7841 static const struct seq_operations tracing_err_log_seq_ops = {
7842 	.start  = tracing_err_log_seq_start,
7843 	.next   = tracing_err_log_seq_next,
7844 	.stop   = tracing_err_log_seq_stop,
7845 	.show   = tracing_err_log_seq_show
7846 };
7847 
tracing_err_log_open(struct inode * inode,struct file * file)7848 static int tracing_err_log_open(struct inode *inode, struct file *file)
7849 {
7850 	struct trace_array *tr = inode->i_private;
7851 	int ret = 0;
7852 
7853 	ret = tracing_check_open_get_tr(tr);
7854 	if (ret)
7855 		return ret;
7856 
7857 	/* If this file was opened for write, then erase contents */
7858 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7859 		clear_tracing_err_log(tr);
7860 
7861 	if (file->f_mode & FMODE_READ) {
7862 		ret = seq_open(file, &tracing_err_log_seq_ops);
7863 		if (!ret) {
7864 			struct seq_file *m = file->private_data;
7865 			m->private = tr;
7866 		} else {
7867 			trace_array_put(tr);
7868 		}
7869 	}
7870 	return ret;
7871 }
7872 
tracing_err_log_write(struct file * file,const char __user * buffer,size_t count,loff_t * ppos)7873 static ssize_t tracing_err_log_write(struct file *file,
7874 				     const char __user *buffer,
7875 				     size_t count, loff_t *ppos)
7876 {
7877 	return count;
7878 }
7879 
tracing_err_log_release(struct inode * inode,struct file * file)7880 static int tracing_err_log_release(struct inode *inode, struct file *file)
7881 {
7882 	struct trace_array *tr = inode->i_private;
7883 
7884 	trace_array_put(tr);
7885 
7886 	if (file->f_mode & FMODE_READ)
7887 		seq_release(inode, file);
7888 
7889 	return 0;
7890 }
7891 
7892 static const struct file_operations tracing_err_log_fops = {
7893 	.open           = tracing_err_log_open,
7894 	.write		= tracing_err_log_write,
7895 	.read           = seq_read,
7896 	.llseek         = tracing_lseek,
7897 	.release        = tracing_err_log_release,
7898 };
7899 
tracing_buffers_open(struct inode * inode,struct file * filp)7900 static int tracing_buffers_open(struct inode *inode, struct file *filp)
7901 {
7902 	struct trace_array *tr = inode->i_private;
7903 	struct ftrace_buffer_info *info;
7904 	int ret;
7905 
7906 	ret = tracing_check_open_get_tr(tr);
7907 	if (ret)
7908 		return ret;
7909 
7910 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
7911 	if (!info) {
7912 		trace_array_put(tr);
7913 		return -ENOMEM;
7914 	}
7915 
7916 	mutex_lock(&trace_types_lock);
7917 
7918 	info->iter.tr		= tr;
7919 	info->iter.cpu_file	= tracing_get_cpu(inode);
7920 	info->iter.trace	= tr->current_trace;
7921 	info->iter.array_buffer = &tr->array_buffer;
7922 	info->spare		= NULL;
7923 	/* Force reading ring buffer for first read */
7924 	info->read		= (unsigned int)-1;
7925 
7926 	filp->private_data = info;
7927 
7928 	tr->trace_ref++;
7929 
7930 	mutex_unlock(&trace_types_lock);
7931 
7932 	ret = nonseekable_open(inode, filp);
7933 	if (ret < 0)
7934 		trace_array_put(tr);
7935 
7936 	return ret;
7937 }
7938 
7939 static __poll_t
tracing_buffers_poll(struct file * filp,poll_table * poll_table)7940 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
7941 {
7942 	struct ftrace_buffer_info *info = filp->private_data;
7943 	struct trace_iterator *iter = &info->iter;
7944 
7945 	return trace_poll(iter, filp, poll_table);
7946 }
7947 
7948 static ssize_t
tracing_buffers_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)7949 tracing_buffers_read(struct file *filp, char __user *ubuf,
7950 		     size_t count, loff_t *ppos)
7951 {
7952 	struct ftrace_buffer_info *info = filp->private_data;
7953 	struct trace_iterator *iter = &info->iter;
7954 	void *trace_data;
7955 	int page_size;
7956 	ssize_t ret = 0;
7957 	ssize_t size;
7958 
7959 	if (!count)
7960 		return 0;
7961 
7962 #ifdef CONFIG_TRACER_MAX_TRACE
7963 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
7964 		return -EBUSY;
7965 #endif
7966 
7967 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
7968 
7969 	/* Make sure the spare matches the current sub buffer size */
7970 	if (info->spare) {
7971 		if (page_size != info->spare_size) {
7972 			ring_buffer_free_read_page(iter->array_buffer->buffer,
7973 						   info->spare_cpu, info->spare);
7974 			info->spare = NULL;
7975 		}
7976 	}
7977 
7978 	if (!info->spare) {
7979 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
7980 							  iter->cpu_file);
7981 		if (IS_ERR(info->spare)) {
7982 			ret = PTR_ERR(info->spare);
7983 			info->spare = NULL;
7984 		} else {
7985 			info->spare_cpu = iter->cpu_file;
7986 			info->spare_size = page_size;
7987 		}
7988 	}
7989 	if (!info->spare)
7990 		return ret;
7991 
7992 	/* Do we have previous read data to read? */
7993 	if (info->read < page_size)
7994 		goto read;
7995 
7996  again:
7997 	trace_access_lock(iter->cpu_file);
7998 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
7999 				    info->spare,
8000 				    count,
8001 				    iter->cpu_file, 0);
8002 	trace_access_unlock(iter->cpu_file);
8003 
8004 	if (ret < 0) {
8005 		if (trace_empty(iter) && !iter->closed) {
8006 			if ((filp->f_flags & O_NONBLOCK))
8007 				return -EAGAIN;
8008 
8009 			ret = wait_on_pipe(iter, 0);
8010 			if (ret)
8011 				return ret;
8012 
8013 			goto again;
8014 		}
8015 		return 0;
8016 	}
8017 
8018 	info->read = 0;
8019  read:
8020 	size = page_size - info->read;
8021 	if (size > count)
8022 		size = count;
8023 	trace_data = ring_buffer_read_page_data(info->spare);
8024 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8025 	if (ret == size)
8026 		return -EFAULT;
8027 
8028 	size -= ret;
8029 
8030 	*ppos += size;
8031 	info->read += size;
8032 
8033 	return size;
8034 }
8035 
tracing_buffers_flush(struct file * file,fl_owner_t id)8036 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8037 {
8038 	struct ftrace_buffer_info *info = file->private_data;
8039 	struct trace_iterator *iter = &info->iter;
8040 
8041 	iter->closed = true;
8042 	/* Make sure the waiters see the new wait_index */
8043 	(void)atomic_fetch_inc_release(&iter->wait_index);
8044 
8045 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8046 
8047 	return 0;
8048 }
8049 
tracing_buffers_release(struct inode * inode,struct file * file)8050 static int tracing_buffers_release(struct inode *inode, struct file *file)
8051 {
8052 	struct ftrace_buffer_info *info = file->private_data;
8053 	struct trace_iterator *iter = &info->iter;
8054 
8055 	mutex_lock(&trace_types_lock);
8056 
8057 	iter->tr->trace_ref--;
8058 
8059 	__trace_array_put(iter->tr);
8060 
8061 	if (info->spare)
8062 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8063 					   info->spare_cpu, info->spare);
8064 	kvfree(info);
8065 
8066 	mutex_unlock(&trace_types_lock);
8067 
8068 	return 0;
8069 }
8070 
8071 struct buffer_ref {
8072 	struct trace_buffer	*buffer;
8073 	void			*page;
8074 	int			cpu;
8075 	refcount_t		refcount;
8076 };
8077 
buffer_ref_release(struct buffer_ref * ref)8078 static void buffer_ref_release(struct buffer_ref *ref)
8079 {
8080 	if (!refcount_dec_and_test(&ref->refcount))
8081 		return;
8082 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8083 	kfree(ref);
8084 }
8085 
buffer_pipe_buf_release(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8086 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8087 				    struct pipe_buffer *buf)
8088 {
8089 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8090 
8091 	buffer_ref_release(ref);
8092 	buf->private = 0;
8093 }
8094 
buffer_pipe_buf_get(struct pipe_inode_info * pipe,struct pipe_buffer * buf)8095 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8096 				struct pipe_buffer *buf)
8097 {
8098 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8099 
8100 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8101 		return false;
8102 
8103 	refcount_inc(&ref->refcount);
8104 	return true;
8105 }
8106 
8107 /* Pipe buffer operations for a buffer. */
8108 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8109 	.release		= buffer_pipe_buf_release,
8110 	.get			= buffer_pipe_buf_get,
8111 };
8112 
8113 /*
8114  * Callback from splice_to_pipe(), if we need to release some pages
8115  * at the end of the spd in case we error'ed out in filling the pipe.
8116  */
buffer_spd_release(struct splice_pipe_desc * spd,unsigned int i)8117 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8118 {
8119 	struct buffer_ref *ref =
8120 		(struct buffer_ref *)spd->partial[i].private;
8121 
8122 	buffer_ref_release(ref);
8123 	spd->partial[i].private = 0;
8124 }
8125 
8126 static ssize_t
tracing_buffers_splice_read(struct file * file,loff_t * ppos,struct pipe_inode_info * pipe,size_t len,unsigned int flags)8127 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8128 			    struct pipe_inode_info *pipe, size_t len,
8129 			    unsigned int flags)
8130 {
8131 	struct ftrace_buffer_info *info = file->private_data;
8132 	struct trace_iterator *iter = &info->iter;
8133 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8134 	struct page *pages_def[PIPE_DEF_BUFFERS];
8135 	struct splice_pipe_desc spd = {
8136 		.pages		= pages_def,
8137 		.partial	= partial_def,
8138 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8139 		.ops		= &buffer_pipe_buf_ops,
8140 		.spd_release	= buffer_spd_release,
8141 	};
8142 	struct buffer_ref *ref;
8143 	bool woken = false;
8144 	int page_size;
8145 	int entries, i;
8146 	ssize_t ret = 0;
8147 
8148 #ifdef CONFIG_TRACER_MAX_TRACE
8149 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8150 		return -EBUSY;
8151 #endif
8152 
8153 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8154 	if (*ppos & (page_size - 1))
8155 		return -EINVAL;
8156 
8157 	if (len & (page_size - 1)) {
8158 		if (len < page_size)
8159 			return -EINVAL;
8160 		len &= (~(page_size - 1));
8161 	}
8162 
8163 	if (splice_grow_spd(pipe, &spd))
8164 		return -ENOMEM;
8165 
8166  again:
8167 	trace_access_lock(iter->cpu_file);
8168 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8169 
8170 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8171 		struct page *page;
8172 		int r;
8173 
8174 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8175 		if (!ref) {
8176 			ret = -ENOMEM;
8177 			break;
8178 		}
8179 
8180 		refcount_set(&ref->refcount, 1);
8181 		ref->buffer = iter->array_buffer->buffer;
8182 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8183 		if (IS_ERR(ref->page)) {
8184 			ret = PTR_ERR(ref->page);
8185 			ref->page = NULL;
8186 			kfree(ref);
8187 			break;
8188 		}
8189 		ref->cpu = iter->cpu_file;
8190 
8191 		r = ring_buffer_read_page(ref->buffer, ref->page,
8192 					  len, iter->cpu_file, 1);
8193 		if (r < 0) {
8194 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8195 						   ref->page);
8196 			kfree(ref);
8197 			break;
8198 		}
8199 
8200 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8201 
8202 		spd.pages[i] = page;
8203 		spd.partial[i].len = page_size;
8204 		spd.partial[i].offset = 0;
8205 		spd.partial[i].private = (unsigned long)ref;
8206 		spd.nr_pages++;
8207 		*ppos += page_size;
8208 
8209 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8210 	}
8211 
8212 	trace_access_unlock(iter->cpu_file);
8213 	spd.nr_pages = i;
8214 
8215 	/* did we read anything? */
8216 	if (!spd.nr_pages) {
8217 
8218 		if (ret)
8219 			goto out;
8220 
8221 		if (woken)
8222 			goto out;
8223 
8224 		ret = -EAGAIN;
8225 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8226 			goto out;
8227 
8228 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8229 		if (ret)
8230 			goto out;
8231 
8232 		/* No need to wait after waking up when tracing is off */
8233 		if (!tracer_tracing_is_on(iter->tr))
8234 			goto out;
8235 
8236 		/* Iterate one more time to collect any new data then exit */
8237 		woken = true;
8238 
8239 		goto again;
8240 	}
8241 
8242 	ret = splice_to_pipe(pipe, &spd);
8243 out:
8244 	splice_shrink_spd(&spd);
8245 
8246 	return ret;
8247 }
8248 
tracing_buffers_ioctl(struct file * file,unsigned int cmd,unsigned long arg)8249 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8250 {
8251 	struct ftrace_buffer_info *info = file->private_data;
8252 	struct trace_iterator *iter = &info->iter;
8253 	int err;
8254 
8255 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8256 		if (!(file->f_flags & O_NONBLOCK)) {
8257 			err = ring_buffer_wait(iter->array_buffer->buffer,
8258 					       iter->cpu_file,
8259 					       iter->tr->buffer_percent,
8260 					       NULL, NULL);
8261 			if (err)
8262 				return err;
8263 		}
8264 
8265 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8266 						  iter->cpu_file);
8267 	} else if (cmd) {
8268 		return -ENOTTY;
8269 	}
8270 
8271 	/*
8272 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8273 	 * waiters
8274 	 */
8275 	mutex_lock(&trace_types_lock);
8276 
8277 	/* Make sure the waiters see the new wait_index */
8278 	(void)atomic_fetch_inc_release(&iter->wait_index);
8279 
8280 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8281 
8282 	mutex_unlock(&trace_types_lock);
8283 	return 0;
8284 }
8285 
8286 #ifdef CONFIG_TRACER_MAX_TRACE
get_snapshot_map(struct trace_array * tr)8287 static int get_snapshot_map(struct trace_array *tr)
8288 {
8289 	int err = 0;
8290 
8291 	/*
8292 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8293 	 * take trace_types_lock. Instead use the specific
8294 	 * snapshot_trigger_lock.
8295 	 */
8296 	spin_lock(&tr->snapshot_trigger_lock);
8297 
8298 	if (tr->snapshot || tr->mapped == UINT_MAX)
8299 		err = -EBUSY;
8300 	else
8301 		tr->mapped++;
8302 
8303 	spin_unlock(&tr->snapshot_trigger_lock);
8304 
8305 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8306 	if (tr->mapped == 1)
8307 		synchronize_rcu();
8308 
8309 	return err;
8310 
8311 }
put_snapshot_map(struct trace_array * tr)8312 static void put_snapshot_map(struct trace_array *tr)
8313 {
8314 	spin_lock(&tr->snapshot_trigger_lock);
8315 	if (!WARN_ON(!tr->mapped))
8316 		tr->mapped--;
8317 	spin_unlock(&tr->snapshot_trigger_lock);
8318 }
8319 #else
get_snapshot_map(struct trace_array * tr)8320 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
put_snapshot_map(struct trace_array * tr)8321 static inline void put_snapshot_map(struct trace_array *tr) { }
8322 #endif
8323 
tracing_buffers_mmap_close(struct vm_area_struct * vma)8324 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8325 {
8326 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8327 	struct trace_iterator *iter = &info->iter;
8328 
8329 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8330 	put_snapshot_map(iter->tr);
8331 }
8332 
8333 static const struct vm_operations_struct tracing_buffers_vmops = {
8334 	.close		= tracing_buffers_mmap_close,
8335 };
8336 
tracing_buffers_mmap(struct file * filp,struct vm_area_struct * vma)8337 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8338 {
8339 	struct ftrace_buffer_info *info = filp->private_data;
8340 	struct trace_iterator *iter = &info->iter;
8341 	int ret = 0;
8342 
8343 	ret = get_snapshot_map(iter->tr);
8344 	if (ret)
8345 		return ret;
8346 
8347 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8348 	if (ret)
8349 		put_snapshot_map(iter->tr);
8350 
8351 	vma->vm_ops = &tracing_buffers_vmops;
8352 
8353 	return ret;
8354 }
8355 
8356 static const struct file_operations tracing_buffers_fops = {
8357 	.open		= tracing_buffers_open,
8358 	.read		= tracing_buffers_read,
8359 	.poll		= tracing_buffers_poll,
8360 	.release	= tracing_buffers_release,
8361 	.flush		= tracing_buffers_flush,
8362 	.splice_read	= tracing_buffers_splice_read,
8363 	.unlocked_ioctl = tracing_buffers_ioctl,
8364 	.mmap		= tracing_buffers_mmap,
8365 };
8366 
8367 static ssize_t
tracing_stats_read(struct file * filp,char __user * ubuf,size_t count,loff_t * ppos)8368 tracing_stats_read(struct file *filp, char __user *ubuf,
8369 		   size_t count, loff_t *ppos)
8370 {
8371 	struct inode *inode = file_inode(filp);
8372 	struct trace_array *tr = inode->i_private;
8373 	struct array_buffer *trace_buf = &tr->array_buffer;
8374 	int cpu = tracing_get_cpu(inode);
8375 	struct trace_seq *s;
8376 	unsigned long cnt;
8377 	unsigned long long t;
8378 	unsigned long usec_rem;
8379 
8380 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8381 	if (!s)
8382 		return -ENOMEM;
8383 
8384 	trace_seq_init(s);
8385 
8386 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8387 	trace_seq_printf(s, "entries: %ld\n", cnt);
8388 
8389 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8390 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8391 
8392 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8393 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8394 
8395 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8396 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8397 
8398 	if (trace_clocks[tr->clock_id].in_ns) {
8399 		/* local or global for trace_clock */
8400 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8401 		usec_rem = do_div(t, USEC_PER_SEC);
8402 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8403 								t, usec_rem);
8404 
8405 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8406 		usec_rem = do_div(t, USEC_PER_SEC);
8407 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8408 	} else {
8409 		/* counter or tsc mode for trace_clock */
8410 		trace_seq_printf(s, "oldest event ts: %llu\n",
8411 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8412 
8413 		trace_seq_printf(s, "now ts: %llu\n",
8414 				ring_buffer_time_stamp(trace_buf->buffer));
8415 	}
8416 
8417 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8418 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8419 
8420 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8421 	trace_seq_printf(s, "read events: %ld\n", cnt);
8422 
8423 	count = simple_read_from_buffer(ubuf, count, ppos,
8424 					s->buffer, trace_seq_used(s));
8425 
8426 	kfree(s);
8427 
8428 	return count;
8429 }
8430 
8431 static const struct file_operations tracing_stats_fops = {
8432 	.open		= tracing_open_generic_tr,
8433 	.read		= tracing_stats_read,
8434 	.llseek		= generic_file_llseek,
8435 	.release	= tracing_release_generic_tr,
8436 };
8437 
8438 #ifdef CONFIG_DYNAMIC_FTRACE
8439 
8440 static ssize_t
tracing_read_dyn_info(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8441 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8442 		  size_t cnt, loff_t *ppos)
8443 {
8444 	ssize_t ret;
8445 	char *buf;
8446 	int r;
8447 
8448 	/* 512 should be plenty to hold the amount needed */
8449 #define DYN_INFO_BUF_SIZE	512
8450 
8451 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8452 	if (!buf)
8453 		return -ENOMEM;
8454 
8455 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8456 		      "%ld pages:%ld groups: %ld\n"
8457 		      "ftrace boot update time = %llu (ns)\n"
8458 		      "ftrace module total update time = %llu (ns)\n",
8459 		      ftrace_update_tot_cnt,
8460 		      ftrace_number_of_pages,
8461 		      ftrace_number_of_groups,
8462 		      ftrace_update_time,
8463 		      ftrace_total_mod_time);
8464 
8465 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8466 	kfree(buf);
8467 	return ret;
8468 }
8469 
8470 static const struct file_operations tracing_dyn_info_fops = {
8471 	.open		= tracing_open_generic,
8472 	.read		= tracing_read_dyn_info,
8473 	.llseek		= generic_file_llseek,
8474 };
8475 #endif /* CONFIG_DYNAMIC_FTRACE */
8476 
8477 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8478 static void
ftrace_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8479 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8480 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8481 		void *data)
8482 {
8483 	tracing_snapshot_instance(tr);
8484 }
8485 
8486 static void
ftrace_count_snapshot(unsigned long ip,unsigned long parent_ip,struct trace_array * tr,struct ftrace_probe_ops * ops,void * data)8487 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8488 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8489 		      void *data)
8490 {
8491 	struct ftrace_func_mapper *mapper = data;
8492 	long *count = NULL;
8493 
8494 	if (mapper)
8495 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8496 
8497 	if (count) {
8498 
8499 		if (*count <= 0)
8500 			return;
8501 
8502 		(*count)--;
8503 	}
8504 
8505 	tracing_snapshot_instance(tr);
8506 }
8507 
8508 static int
ftrace_snapshot_print(struct seq_file * m,unsigned long ip,struct ftrace_probe_ops * ops,void * data)8509 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8510 		      struct ftrace_probe_ops *ops, void *data)
8511 {
8512 	struct ftrace_func_mapper *mapper = data;
8513 	long *count = NULL;
8514 
8515 	seq_printf(m, "%ps:", (void *)ip);
8516 
8517 	seq_puts(m, "snapshot");
8518 
8519 	if (mapper)
8520 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8521 
8522 	if (count)
8523 		seq_printf(m, ":count=%ld\n", *count);
8524 	else
8525 		seq_puts(m, ":unlimited\n");
8526 
8527 	return 0;
8528 }
8529 
8530 static int
ftrace_snapshot_init(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * init_data,void ** data)8531 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8532 		     unsigned long ip, void *init_data, void **data)
8533 {
8534 	struct ftrace_func_mapper *mapper = *data;
8535 
8536 	if (!mapper) {
8537 		mapper = allocate_ftrace_func_mapper();
8538 		if (!mapper)
8539 			return -ENOMEM;
8540 		*data = mapper;
8541 	}
8542 
8543 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8544 }
8545 
8546 static void
ftrace_snapshot_free(struct ftrace_probe_ops * ops,struct trace_array * tr,unsigned long ip,void * data)8547 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8548 		     unsigned long ip, void *data)
8549 {
8550 	struct ftrace_func_mapper *mapper = data;
8551 
8552 	if (!ip) {
8553 		if (!mapper)
8554 			return;
8555 		free_ftrace_func_mapper(mapper, NULL);
8556 		return;
8557 	}
8558 
8559 	ftrace_func_mapper_remove_ip(mapper, ip);
8560 }
8561 
8562 static struct ftrace_probe_ops snapshot_probe_ops = {
8563 	.func			= ftrace_snapshot,
8564 	.print			= ftrace_snapshot_print,
8565 };
8566 
8567 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8568 	.func			= ftrace_count_snapshot,
8569 	.print			= ftrace_snapshot_print,
8570 	.init			= ftrace_snapshot_init,
8571 	.free			= ftrace_snapshot_free,
8572 };
8573 
8574 static int
ftrace_trace_snapshot_callback(struct trace_array * tr,struct ftrace_hash * hash,char * glob,char * cmd,char * param,int enable)8575 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8576 			       char *glob, char *cmd, char *param, int enable)
8577 {
8578 	struct ftrace_probe_ops *ops;
8579 	void *count = (void *)-1;
8580 	char *number;
8581 	int ret;
8582 
8583 	if (!tr)
8584 		return -ENODEV;
8585 
8586 	/* hash funcs only work with set_ftrace_filter */
8587 	if (!enable)
8588 		return -EINVAL;
8589 
8590 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8591 
8592 	if (glob[0] == '!') {
8593 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8594 		if (!ret)
8595 			tracing_disarm_snapshot(tr);
8596 
8597 		return ret;
8598 	}
8599 
8600 	if (!param)
8601 		goto out_reg;
8602 
8603 	number = strsep(&param, ":");
8604 
8605 	if (!strlen(number))
8606 		goto out_reg;
8607 
8608 	/*
8609 	 * We use the callback data field (which is a pointer)
8610 	 * as our counter.
8611 	 */
8612 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8613 	if (ret)
8614 		return ret;
8615 
8616  out_reg:
8617 	ret = tracing_arm_snapshot(tr);
8618 	if (ret < 0)
8619 		goto out;
8620 
8621 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8622 	if (ret < 0)
8623 		tracing_disarm_snapshot(tr);
8624  out:
8625 	return ret < 0 ? ret : 0;
8626 }
8627 
8628 static struct ftrace_func_command ftrace_snapshot_cmd = {
8629 	.name			= "snapshot",
8630 	.func			= ftrace_trace_snapshot_callback,
8631 };
8632 
register_snapshot_cmd(void)8633 static __init int register_snapshot_cmd(void)
8634 {
8635 	return register_ftrace_command(&ftrace_snapshot_cmd);
8636 }
8637 #else
register_snapshot_cmd(void)8638 static inline __init int register_snapshot_cmd(void) { return 0; }
8639 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8640 
tracing_get_dentry(struct trace_array * tr)8641 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8642 {
8643 	if (WARN_ON(!tr->dir))
8644 		return ERR_PTR(-ENODEV);
8645 
8646 	/* Top directory uses NULL as the parent */
8647 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8648 		return NULL;
8649 
8650 	/* All sub buffers have a descriptor */
8651 	return tr->dir;
8652 }
8653 
tracing_dentry_percpu(struct trace_array * tr,int cpu)8654 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8655 {
8656 	struct dentry *d_tracer;
8657 
8658 	if (tr->percpu_dir)
8659 		return tr->percpu_dir;
8660 
8661 	d_tracer = tracing_get_dentry(tr);
8662 	if (IS_ERR(d_tracer))
8663 		return NULL;
8664 
8665 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8666 
8667 	MEM_FAIL(!tr->percpu_dir,
8668 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8669 
8670 	return tr->percpu_dir;
8671 }
8672 
8673 static struct dentry *
trace_create_cpu_file(const char * name,umode_t mode,struct dentry * parent,void * data,long cpu,const struct file_operations * fops)8674 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8675 		      void *data, long cpu, const struct file_operations *fops)
8676 {
8677 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8678 
8679 	if (ret) /* See tracing_get_cpu() */
8680 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8681 	return ret;
8682 }
8683 
8684 static void
tracing_init_tracefs_percpu(struct trace_array * tr,long cpu)8685 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8686 {
8687 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8688 	struct dentry *d_cpu;
8689 	char cpu_dir[30]; /* 30 characters should be more than enough */
8690 
8691 	if (!d_percpu)
8692 		return;
8693 
8694 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8695 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8696 	if (!d_cpu) {
8697 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8698 		return;
8699 	}
8700 
8701 	/* per cpu trace_pipe */
8702 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8703 				tr, cpu, &tracing_pipe_fops);
8704 
8705 	/* per cpu trace */
8706 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8707 				tr, cpu, &tracing_fops);
8708 
8709 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8710 				tr, cpu, &tracing_buffers_fops);
8711 
8712 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8713 				tr, cpu, &tracing_stats_fops);
8714 
8715 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8716 				tr, cpu, &tracing_entries_fops);
8717 
8718 	if (tr->range_addr_start)
8719 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8720 				      tr, cpu, &tracing_buffer_meta_fops);
8721 #ifdef CONFIG_TRACER_SNAPSHOT
8722 	if (!tr->range_addr_start) {
8723 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8724 				      tr, cpu, &snapshot_fops);
8725 
8726 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8727 				      tr, cpu, &snapshot_raw_fops);
8728 	}
8729 #endif
8730 }
8731 
8732 #ifdef CONFIG_FTRACE_SELFTEST
8733 /* Let selftest have access to static functions in this file */
8734 #include "trace_selftest.c"
8735 #endif
8736 
8737 static ssize_t
trace_options_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8738 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8739 			loff_t *ppos)
8740 {
8741 	struct trace_option_dentry *topt = filp->private_data;
8742 	char *buf;
8743 
8744 	if (topt->flags->val & topt->opt->bit)
8745 		buf = "1\n";
8746 	else
8747 		buf = "0\n";
8748 
8749 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8750 }
8751 
8752 static ssize_t
trace_options_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8753 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8754 			 loff_t *ppos)
8755 {
8756 	struct trace_option_dentry *topt = filp->private_data;
8757 	unsigned long val;
8758 	int ret;
8759 
8760 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8761 	if (ret)
8762 		return ret;
8763 
8764 	if (val != 0 && val != 1)
8765 		return -EINVAL;
8766 
8767 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8768 		mutex_lock(&trace_types_lock);
8769 		ret = __set_tracer_option(topt->tr, topt->flags,
8770 					  topt->opt, !val);
8771 		mutex_unlock(&trace_types_lock);
8772 		if (ret)
8773 			return ret;
8774 	}
8775 
8776 	*ppos += cnt;
8777 
8778 	return cnt;
8779 }
8780 
tracing_open_options(struct inode * inode,struct file * filp)8781 static int tracing_open_options(struct inode *inode, struct file *filp)
8782 {
8783 	struct trace_option_dentry *topt = inode->i_private;
8784 	int ret;
8785 
8786 	ret = tracing_check_open_get_tr(topt->tr);
8787 	if (ret)
8788 		return ret;
8789 
8790 	filp->private_data = inode->i_private;
8791 	return 0;
8792 }
8793 
tracing_release_options(struct inode * inode,struct file * file)8794 static int tracing_release_options(struct inode *inode, struct file *file)
8795 {
8796 	struct trace_option_dentry *topt = file->private_data;
8797 
8798 	trace_array_put(topt->tr);
8799 	return 0;
8800 }
8801 
8802 static const struct file_operations trace_options_fops = {
8803 	.open = tracing_open_options,
8804 	.read = trace_options_read,
8805 	.write = trace_options_write,
8806 	.llseek	= generic_file_llseek,
8807 	.release = tracing_release_options,
8808 };
8809 
8810 /*
8811  * In order to pass in both the trace_array descriptor as well as the index
8812  * to the flag that the trace option file represents, the trace_array
8813  * has a character array of trace_flags_index[], which holds the index
8814  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8815  * The address of this character array is passed to the flag option file
8816  * read/write callbacks.
8817  *
8818  * In order to extract both the index and the trace_array descriptor,
8819  * get_tr_index() uses the following algorithm.
8820  *
8821  *   idx = *ptr;
8822  *
8823  * As the pointer itself contains the address of the index (remember
8824  * index[1] == 1).
8825  *
8826  * Then to get the trace_array descriptor, by subtracting that index
8827  * from the ptr, we get to the start of the index itself.
8828  *
8829  *   ptr - idx == &index[0]
8830  *
8831  * Then a simple container_of() from that pointer gets us to the
8832  * trace_array descriptor.
8833  */
get_tr_index(void * data,struct trace_array ** ptr,unsigned int * pindex)8834 static void get_tr_index(void *data, struct trace_array **ptr,
8835 			 unsigned int *pindex)
8836 {
8837 	*pindex = *(unsigned char *)data;
8838 
8839 	*ptr = container_of(data - *pindex, struct trace_array,
8840 			    trace_flags_index);
8841 }
8842 
8843 static ssize_t
trace_options_core_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)8844 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8845 			loff_t *ppos)
8846 {
8847 	void *tr_index = filp->private_data;
8848 	struct trace_array *tr;
8849 	unsigned int index;
8850 	char *buf;
8851 
8852 	get_tr_index(tr_index, &tr, &index);
8853 
8854 	if (tr->trace_flags & (1 << index))
8855 		buf = "1\n";
8856 	else
8857 		buf = "0\n";
8858 
8859 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8860 }
8861 
8862 static ssize_t
trace_options_core_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)8863 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8864 			 loff_t *ppos)
8865 {
8866 	void *tr_index = filp->private_data;
8867 	struct trace_array *tr;
8868 	unsigned int index;
8869 	unsigned long val;
8870 	int ret;
8871 
8872 	get_tr_index(tr_index, &tr, &index);
8873 
8874 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8875 	if (ret)
8876 		return ret;
8877 
8878 	if (val != 0 && val != 1)
8879 		return -EINVAL;
8880 
8881 	mutex_lock(&event_mutex);
8882 	mutex_lock(&trace_types_lock);
8883 	ret = set_tracer_flag(tr, 1 << index, val);
8884 	mutex_unlock(&trace_types_lock);
8885 	mutex_unlock(&event_mutex);
8886 
8887 	if (ret < 0)
8888 		return ret;
8889 
8890 	*ppos += cnt;
8891 
8892 	return cnt;
8893 }
8894 
8895 static const struct file_operations trace_options_core_fops = {
8896 	.open = tracing_open_generic,
8897 	.read = trace_options_core_read,
8898 	.write = trace_options_core_write,
8899 	.llseek = generic_file_llseek,
8900 };
8901 
trace_create_file(const char * name,umode_t mode,struct dentry * parent,void * data,const struct file_operations * fops)8902 struct dentry *trace_create_file(const char *name,
8903 				 umode_t mode,
8904 				 struct dentry *parent,
8905 				 void *data,
8906 				 const struct file_operations *fops)
8907 {
8908 	struct dentry *ret;
8909 
8910 	ret = tracefs_create_file(name, mode, parent, data, fops);
8911 	if (!ret)
8912 		pr_warn("Could not create tracefs '%s' entry\n", name);
8913 
8914 	return ret;
8915 }
8916 
8917 
trace_options_init_dentry(struct trace_array * tr)8918 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
8919 {
8920 	struct dentry *d_tracer;
8921 
8922 	if (tr->options)
8923 		return tr->options;
8924 
8925 	d_tracer = tracing_get_dentry(tr);
8926 	if (IS_ERR(d_tracer))
8927 		return NULL;
8928 
8929 	tr->options = tracefs_create_dir("options", d_tracer);
8930 	if (!tr->options) {
8931 		pr_warn("Could not create tracefs directory 'options'\n");
8932 		return NULL;
8933 	}
8934 
8935 	return tr->options;
8936 }
8937 
8938 static void
create_trace_option_file(struct trace_array * tr,struct trace_option_dentry * topt,struct tracer_flags * flags,struct tracer_opt * opt)8939 create_trace_option_file(struct trace_array *tr,
8940 			 struct trace_option_dentry *topt,
8941 			 struct tracer_flags *flags,
8942 			 struct tracer_opt *opt)
8943 {
8944 	struct dentry *t_options;
8945 
8946 	t_options = trace_options_init_dentry(tr);
8947 	if (!t_options)
8948 		return;
8949 
8950 	topt->flags = flags;
8951 	topt->opt = opt;
8952 	topt->tr = tr;
8953 
8954 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
8955 					t_options, topt, &trace_options_fops);
8956 
8957 }
8958 
8959 static void
create_trace_option_files(struct trace_array * tr,struct tracer * tracer)8960 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
8961 {
8962 	struct trace_option_dentry *topts;
8963 	struct trace_options *tr_topts;
8964 	struct tracer_flags *flags;
8965 	struct tracer_opt *opts;
8966 	int cnt;
8967 	int i;
8968 
8969 	if (!tracer)
8970 		return;
8971 
8972 	flags = tracer->flags;
8973 
8974 	if (!flags || !flags->opts)
8975 		return;
8976 
8977 	/*
8978 	 * If this is an instance, only create flags for tracers
8979 	 * the instance may have.
8980 	 */
8981 	if (!trace_ok_for_array(tracer, tr))
8982 		return;
8983 
8984 	for (i = 0; i < tr->nr_topts; i++) {
8985 		/* Make sure there's no duplicate flags. */
8986 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
8987 			return;
8988 	}
8989 
8990 	opts = flags->opts;
8991 
8992 	for (cnt = 0; opts[cnt].name; cnt++)
8993 		;
8994 
8995 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
8996 	if (!topts)
8997 		return;
8998 
8999 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9000 			    GFP_KERNEL);
9001 	if (!tr_topts) {
9002 		kfree(topts);
9003 		return;
9004 	}
9005 
9006 	tr->topts = tr_topts;
9007 	tr->topts[tr->nr_topts].tracer = tracer;
9008 	tr->topts[tr->nr_topts].topts = topts;
9009 	tr->nr_topts++;
9010 
9011 	for (cnt = 0; opts[cnt].name; cnt++) {
9012 		create_trace_option_file(tr, &topts[cnt], flags,
9013 					 &opts[cnt]);
9014 		MEM_FAIL(topts[cnt].entry == NULL,
9015 			  "Failed to create trace option: %s",
9016 			  opts[cnt].name);
9017 	}
9018 }
9019 
9020 static struct dentry *
create_trace_option_core_file(struct trace_array * tr,const char * option,long index)9021 create_trace_option_core_file(struct trace_array *tr,
9022 			      const char *option, long index)
9023 {
9024 	struct dentry *t_options;
9025 
9026 	t_options = trace_options_init_dentry(tr);
9027 	if (!t_options)
9028 		return NULL;
9029 
9030 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9031 				 (void *)&tr->trace_flags_index[index],
9032 				 &trace_options_core_fops);
9033 }
9034 
create_trace_options_dir(struct trace_array * tr)9035 static void create_trace_options_dir(struct trace_array *tr)
9036 {
9037 	struct dentry *t_options;
9038 	bool top_level = tr == &global_trace;
9039 	int i;
9040 
9041 	t_options = trace_options_init_dentry(tr);
9042 	if (!t_options)
9043 		return;
9044 
9045 	for (i = 0; trace_options[i]; i++) {
9046 		if (top_level ||
9047 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9048 			create_trace_option_core_file(tr, trace_options[i], i);
9049 	}
9050 }
9051 
9052 static ssize_t
rb_simple_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9053 rb_simple_read(struct file *filp, char __user *ubuf,
9054 	       size_t cnt, loff_t *ppos)
9055 {
9056 	struct trace_array *tr = filp->private_data;
9057 	char buf[64];
9058 	int r;
9059 
9060 	r = tracer_tracing_is_on(tr);
9061 	r = sprintf(buf, "%d\n", r);
9062 
9063 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9064 }
9065 
9066 static ssize_t
rb_simple_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9067 rb_simple_write(struct file *filp, const char __user *ubuf,
9068 		size_t cnt, loff_t *ppos)
9069 {
9070 	struct trace_array *tr = filp->private_data;
9071 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9072 	unsigned long val;
9073 	int ret;
9074 
9075 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9076 	if (ret)
9077 		return ret;
9078 
9079 	if (buffer) {
9080 		mutex_lock(&trace_types_lock);
9081 		if (!!val == tracer_tracing_is_on(tr)) {
9082 			val = 0; /* do nothing */
9083 		} else if (val) {
9084 			tracer_tracing_on(tr);
9085 			if (tr->current_trace->start)
9086 				tr->current_trace->start(tr);
9087 		} else {
9088 			tracer_tracing_off(tr);
9089 			if (tr->current_trace->stop)
9090 				tr->current_trace->stop(tr);
9091 			/* Wake up any waiters */
9092 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9093 		}
9094 		mutex_unlock(&trace_types_lock);
9095 	}
9096 
9097 	(*ppos)++;
9098 
9099 	return cnt;
9100 }
9101 
9102 static const struct file_operations rb_simple_fops = {
9103 	.open		= tracing_open_generic_tr,
9104 	.read		= rb_simple_read,
9105 	.write		= rb_simple_write,
9106 	.release	= tracing_release_generic_tr,
9107 	.llseek		= default_llseek,
9108 };
9109 
9110 static ssize_t
buffer_percent_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9111 buffer_percent_read(struct file *filp, char __user *ubuf,
9112 		    size_t cnt, loff_t *ppos)
9113 {
9114 	struct trace_array *tr = filp->private_data;
9115 	char buf[64];
9116 	int r;
9117 
9118 	r = tr->buffer_percent;
9119 	r = sprintf(buf, "%d\n", r);
9120 
9121 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9122 }
9123 
9124 static ssize_t
buffer_percent_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9125 buffer_percent_write(struct file *filp, const char __user *ubuf,
9126 		     size_t cnt, loff_t *ppos)
9127 {
9128 	struct trace_array *tr = filp->private_data;
9129 	unsigned long val;
9130 	int ret;
9131 
9132 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9133 	if (ret)
9134 		return ret;
9135 
9136 	if (val > 100)
9137 		return -EINVAL;
9138 
9139 	tr->buffer_percent = val;
9140 
9141 	(*ppos)++;
9142 
9143 	return cnt;
9144 }
9145 
9146 static const struct file_operations buffer_percent_fops = {
9147 	.open		= tracing_open_generic_tr,
9148 	.read		= buffer_percent_read,
9149 	.write		= buffer_percent_write,
9150 	.release	= tracing_release_generic_tr,
9151 	.llseek		= default_llseek,
9152 };
9153 
9154 static ssize_t
buffer_subbuf_size_read(struct file * filp,char __user * ubuf,size_t cnt,loff_t * ppos)9155 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9156 {
9157 	struct trace_array *tr = filp->private_data;
9158 	size_t size;
9159 	char buf[64];
9160 	int order;
9161 	int r;
9162 
9163 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9164 	size = (PAGE_SIZE << order) / 1024;
9165 
9166 	r = sprintf(buf, "%zd\n", size);
9167 
9168 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9169 }
9170 
9171 static ssize_t
buffer_subbuf_size_write(struct file * filp,const char __user * ubuf,size_t cnt,loff_t * ppos)9172 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9173 			 size_t cnt, loff_t *ppos)
9174 {
9175 	struct trace_array *tr = filp->private_data;
9176 	unsigned long val;
9177 	int old_order;
9178 	int order;
9179 	int pages;
9180 	int ret;
9181 
9182 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9183 	if (ret)
9184 		return ret;
9185 
9186 	val *= 1024; /* value passed in is in KB */
9187 
9188 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9189 	order = fls(pages - 1);
9190 
9191 	/* limit between 1 and 128 system pages */
9192 	if (order < 0 || order > 7)
9193 		return -EINVAL;
9194 
9195 	/* Do not allow tracing while changing the order of the ring buffer */
9196 	tracing_stop_tr(tr);
9197 
9198 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9199 	if (old_order == order)
9200 		goto out;
9201 
9202 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9203 	if (ret)
9204 		goto out;
9205 
9206 #ifdef CONFIG_TRACER_MAX_TRACE
9207 
9208 	if (!tr->allocated_snapshot)
9209 		goto out_max;
9210 
9211 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9212 	if (ret) {
9213 		/* Put back the old order */
9214 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9215 		if (WARN_ON_ONCE(cnt)) {
9216 			/*
9217 			 * AARGH! We are left with different orders!
9218 			 * The max buffer is our "snapshot" buffer.
9219 			 * When a tracer needs a snapshot (one of the
9220 			 * latency tracers), it swaps the max buffer
9221 			 * with the saved snap shot. We succeeded to
9222 			 * update the order of the main buffer, but failed to
9223 			 * update the order of the max buffer. But when we tried
9224 			 * to reset the main buffer to the original size, we
9225 			 * failed there too. This is very unlikely to
9226 			 * happen, but if it does, warn and kill all
9227 			 * tracing.
9228 			 */
9229 			tracing_disabled = 1;
9230 		}
9231 		goto out;
9232 	}
9233  out_max:
9234 #endif
9235 	(*ppos)++;
9236  out:
9237 	if (ret)
9238 		cnt = ret;
9239 	tracing_start_tr(tr);
9240 	return cnt;
9241 }
9242 
9243 static const struct file_operations buffer_subbuf_size_fops = {
9244 	.open		= tracing_open_generic_tr,
9245 	.read		= buffer_subbuf_size_read,
9246 	.write		= buffer_subbuf_size_write,
9247 	.release	= tracing_release_generic_tr,
9248 	.llseek		= default_llseek,
9249 };
9250 
9251 static struct dentry *trace_instance_dir;
9252 
9253 static void
9254 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9255 
9256 static int
allocate_trace_buffer(struct trace_array * tr,struct array_buffer * buf,int size)9257 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9258 {
9259 	enum ring_buffer_flags rb_flags;
9260 
9261 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9262 
9263 	buf->tr = tr;
9264 
9265 	if (tr->range_addr_start && tr->range_addr_size) {
9266 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9267 						      tr->range_addr_start,
9268 						      tr->range_addr_size);
9269 
9270 		ring_buffer_last_boot_delta(buf->buffer,
9271 					    &tr->text_delta, &tr->data_delta);
9272 		/*
9273 		 * This is basically the same as a mapped buffer,
9274 		 * with the same restrictions.
9275 		 */
9276 		tr->mapped++;
9277 	} else {
9278 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9279 	}
9280 	if (!buf->buffer)
9281 		return -ENOMEM;
9282 
9283 	buf->data = alloc_percpu(struct trace_array_cpu);
9284 	if (!buf->data) {
9285 		ring_buffer_free(buf->buffer);
9286 		buf->buffer = NULL;
9287 		return -ENOMEM;
9288 	}
9289 
9290 	/* Allocate the first page for all buffers */
9291 	set_buffer_entries(&tr->array_buffer,
9292 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9293 
9294 	return 0;
9295 }
9296 
free_trace_buffer(struct array_buffer * buf)9297 static void free_trace_buffer(struct array_buffer *buf)
9298 {
9299 	if (buf->buffer) {
9300 		ring_buffer_free(buf->buffer);
9301 		buf->buffer = NULL;
9302 		free_percpu(buf->data);
9303 		buf->data = NULL;
9304 	}
9305 }
9306 
allocate_trace_buffers(struct trace_array * tr,int size)9307 static int allocate_trace_buffers(struct trace_array *tr, int size)
9308 {
9309 	int ret;
9310 
9311 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9312 	if (ret)
9313 		return ret;
9314 
9315 #ifdef CONFIG_TRACER_MAX_TRACE
9316 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9317 	if (tr->range_addr_start)
9318 		return 0;
9319 
9320 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9321 				    allocate_snapshot ? size : 1);
9322 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9323 		free_trace_buffer(&tr->array_buffer);
9324 		return -ENOMEM;
9325 	}
9326 	tr->allocated_snapshot = allocate_snapshot;
9327 
9328 	allocate_snapshot = false;
9329 #endif
9330 
9331 	return 0;
9332 }
9333 
free_trace_buffers(struct trace_array * tr)9334 static void free_trace_buffers(struct trace_array *tr)
9335 {
9336 	if (!tr)
9337 		return;
9338 
9339 	free_trace_buffer(&tr->array_buffer);
9340 
9341 #ifdef CONFIG_TRACER_MAX_TRACE
9342 	free_trace_buffer(&tr->max_buffer);
9343 #endif
9344 }
9345 
init_trace_flags_index(struct trace_array * tr)9346 static void init_trace_flags_index(struct trace_array *tr)
9347 {
9348 	int i;
9349 
9350 	/* Used by the trace options files */
9351 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9352 		tr->trace_flags_index[i] = i;
9353 }
9354 
__update_tracer_options(struct trace_array * tr)9355 static void __update_tracer_options(struct trace_array *tr)
9356 {
9357 	struct tracer *t;
9358 
9359 	for (t = trace_types; t; t = t->next)
9360 		add_tracer_options(tr, t);
9361 }
9362 
update_tracer_options(struct trace_array * tr)9363 static void update_tracer_options(struct trace_array *tr)
9364 {
9365 	mutex_lock(&trace_types_lock);
9366 	tracer_options_updated = true;
9367 	__update_tracer_options(tr);
9368 	mutex_unlock(&trace_types_lock);
9369 }
9370 
9371 /* Must have trace_types_lock held */
trace_array_find(const char * instance)9372 struct trace_array *trace_array_find(const char *instance)
9373 {
9374 	struct trace_array *tr, *found = NULL;
9375 
9376 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9377 		if (tr->name && strcmp(tr->name, instance) == 0) {
9378 			found = tr;
9379 			break;
9380 		}
9381 	}
9382 
9383 	return found;
9384 }
9385 
trace_array_find_get(const char * instance)9386 struct trace_array *trace_array_find_get(const char *instance)
9387 {
9388 	struct trace_array *tr;
9389 
9390 	mutex_lock(&trace_types_lock);
9391 	tr = trace_array_find(instance);
9392 	if (tr)
9393 		tr->ref++;
9394 	mutex_unlock(&trace_types_lock);
9395 
9396 	return tr;
9397 }
9398 
trace_array_create_dir(struct trace_array * tr)9399 static int trace_array_create_dir(struct trace_array *tr)
9400 {
9401 	int ret;
9402 
9403 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9404 	if (!tr->dir)
9405 		return -EINVAL;
9406 
9407 	ret = event_trace_add_tracer(tr->dir, tr);
9408 	if (ret) {
9409 		tracefs_remove(tr->dir);
9410 		return ret;
9411 	}
9412 
9413 	init_tracer_tracefs(tr, tr->dir);
9414 	__update_tracer_options(tr);
9415 
9416 	return ret;
9417 }
9418 
9419 static struct trace_array *
trace_array_create_systems(const char * name,const char * systems,unsigned long range_addr_start,unsigned long range_addr_size)9420 trace_array_create_systems(const char *name, const char *systems,
9421 			   unsigned long range_addr_start,
9422 			   unsigned long range_addr_size)
9423 {
9424 	struct trace_array *tr;
9425 	int ret;
9426 
9427 	ret = -ENOMEM;
9428 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9429 	if (!tr)
9430 		return ERR_PTR(ret);
9431 
9432 	tr->name = kstrdup(name, GFP_KERNEL);
9433 	if (!tr->name)
9434 		goto out_free_tr;
9435 
9436 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9437 		goto out_free_tr;
9438 
9439 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9440 		goto out_free_tr;
9441 
9442 	if (systems) {
9443 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9444 		if (!tr->system_names)
9445 			goto out_free_tr;
9446 	}
9447 
9448 	/* Only for boot up memory mapped ring buffers */
9449 	tr->range_addr_start = range_addr_start;
9450 	tr->range_addr_size = range_addr_size;
9451 
9452 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9453 
9454 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9455 
9456 	raw_spin_lock_init(&tr->start_lock);
9457 
9458 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9459 #ifdef CONFIG_TRACER_MAX_TRACE
9460 	spin_lock_init(&tr->snapshot_trigger_lock);
9461 #endif
9462 	tr->current_trace = &nop_trace;
9463 
9464 	INIT_LIST_HEAD(&tr->systems);
9465 	INIT_LIST_HEAD(&tr->events);
9466 	INIT_LIST_HEAD(&tr->hist_vars);
9467 	INIT_LIST_HEAD(&tr->err_log);
9468 
9469 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9470 		goto out_free_tr;
9471 
9472 	/* The ring buffer is defaultly expanded */
9473 	trace_set_ring_buffer_expanded(tr);
9474 
9475 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9476 		goto out_free_tr;
9477 
9478 	ftrace_init_trace_array(tr);
9479 
9480 	init_trace_flags_index(tr);
9481 
9482 	if (trace_instance_dir) {
9483 		ret = trace_array_create_dir(tr);
9484 		if (ret)
9485 			goto out_free_tr;
9486 	} else
9487 		__trace_early_add_events(tr);
9488 
9489 	list_add(&tr->list, &ftrace_trace_arrays);
9490 
9491 	tr->ref++;
9492 
9493 	return tr;
9494 
9495  out_free_tr:
9496 	ftrace_free_ftrace_ops(tr);
9497 	free_trace_buffers(tr);
9498 	free_cpumask_var(tr->pipe_cpumask);
9499 	free_cpumask_var(tr->tracing_cpumask);
9500 	kfree_const(tr->system_names);
9501 	kfree(tr->name);
9502 	kfree(tr);
9503 
9504 	return ERR_PTR(ret);
9505 }
9506 
trace_array_create(const char * name)9507 static struct trace_array *trace_array_create(const char *name)
9508 {
9509 	return trace_array_create_systems(name, NULL, 0, 0);
9510 }
9511 
instance_mkdir(const char * name)9512 static int instance_mkdir(const char *name)
9513 {
9514 	struct trace_array *tr;
9515 	int ret;
9516 
9517 	mutex_lock(&event_mutex);
9518 	mutex_lock(&trace_types_lock);
9519 
9520 	ret = -EEXIST;
9521 	if (trace_array_find(name))
9522 		goto out_unlock;
9523 
9524 	tr = trace_array_create(name);
9525 
9526 	ret = PTR_ERR_OR_ZERO(tr);
9527 
9528 out_unlock:
9529 	mutex_unlock(&trace_types_lock);
9530 	mutex_unlock(&event_mutex);
9531 	return ret;
9532 }
9533 
map_pages(u64 start,u64 size)9534 static u64 map_pages(u64 start, u64 size)
9535 {
9536 	struct page **pages;
9537 	phys_addr_t page_start;
9538 	unsigned int page_count;
9539 	unsigned int i;
9540 	void *vaddr;
9541 
9542 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9543 
9544 	page_start = start;
9545 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9546 	if (!pages)
9547 		return 0;
9548 
9549 	for (i = 0; i < page_count; i++) {
9550 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9551 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9552 	}
9553 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9554 	kfree(pages);
9555 
9556 	return (u64)(unsigned long)vaddr;
9557 }
9558 
9559 /**
9560  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9561  * @name: The name of the trace array to be looked up/created.
9562  * @systems: A list of systems to create event directories for (NULL for all)
9563  *
9564  * Returns pointer to trace array with given name.
9565  * NULL, if it cannot be created.
9566  *
9567  * NOTE: This function increments the reference counter associated with the
9568  * trace array returned. This makes sure it cannot be freed while in use.
9569  * Use trace_array_put() once the trace array is no longer needed.
9570  * If the trace_array is to be freed, trace_array_destroy() needs to
9571  * be called after the trace_array_put(), or simply let user space delete
9572  * it from the tracefs instances directory. But until the
9573  * trace_array_put() is called, user space can not delete it.
9574  *
9575  */
trace_array_get_by_name(const char * name,const char * systems)9576 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9577 {
9578 	struct trace_array *tr;
9579 
9580 	mutex_lock(&event_mutex);
9581 	mutex_lock(&trace_types_lock);
9582 
9583 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9584 		if (tr->name && strcmp(tr->name, name) == 0)
9585 			goto out_unlock;
9586 	}
9587 
9588 	tr = trace_array_create_systems(name, systems, 0, 0);
9589 
9590 	if (IS_ERR(tr))
9591 		tr = NULL;
9592 out_unlock:
9593 	if (tr)
9594 		tr->ref++;
9595 
9596 	mutex_unlock(&trace_types_lock);
9597 	mutex_unlock(&event_mutex);
9598 	return tr;
9599 }
9600 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9601 
__remove_instance(struct trace_array * tr)9602 static int __remove_instance(struct trace_array *tr)
9603 {
9604 	int i;
9605 
9606 	/* Reference counter for a newly created trace array = 1. */
9607 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9608 		return -EBUSY;
9609 
9610 	list_del(&tr->list);
9611 
9612 	/* Disable all the flags that were enabled coming in */
9613 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9614 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9615 			set_tracer_flag(tr, 1 << i, 0);
9616 	}
9617 
9618 	if (printk_trace == tr)
9619 		update_printk_trace(&global_trace);
9620 
9621 	tracing_set_nop(tr);
9622 	clear_ftrace_function_probes(tr);
9623 	event_trace_del_tracer(tr);
9624 	ftrace_clear_pids(tr);
9625 	ftrace_destroy_function_files(tr);
9626 	tracefs_remove(tr->dir);
9627 	free_percpu(tr->last_func_repeats);
9628 	free_trace_buffers(tr);
9629 	clear_tracing_err_log(tr);
9630 
9631 	for (i = 0; i < tr->nr_topts; i++) {
9632 		kfree(tr->topts[i].topts);
9633 	}
9634 	kfree(tr->topts);
9635 
9636 	free_cpumask_var(tr->pipe_cpumask);
9637 	free_cpumask_var(tr->tracing_cpumask);
9638 	kfree_const(tr->system_names);
9639 	kfree(tr->name);
9640 	kfree(tr);
9641 
9642 	return 0;
9643 }
9644 
trace_array_destroy(struct trace_array * this_tr)9645 int trace_array_destroy(struct trace_array *this_tr)
9646 {
9647 	struct trace_array *tr;
9648 	int ret;
9649 
9650 	if (!this_tr)
9651 		return -EINVAL;
9652 
9653 	mutex_lock(&event_mutex);
9654 	mutex_lock(&trace_types_lock);
9655 
9656 	ret = -ENODEV;
9657 
9658 	/* Making sure trace array exists before destroying it. */
9659 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9660 		if (tr == this_tr) {
9661 			ret = __remove_instance(tr);
9662 			break;
9663 		}
9664 	}
9665 
9666 	mutex_unlock(&trace_types_lock);
9667 	mutex_unlock(&event_mutex);
9668 
9669 	return ret;
9670 }
9671 EXPORT_SYMBOL_GPL(trace_array_destroy);
9672 
instance_rmdir(const char * name)9673 static int instance_rmdir(const char *name)
9674 {
9675 	struct trace_array *tr;
9676 	int ret;
9677 
9678 	mutex_lock(&event_mutex);
9679 	mutex_lock(&trace_types_lock);
9680 
9681 	ret = -ENODEV;
9682 	tr = trace_array_find(name);
9683 	if (tr)
9684 		ret = __remove_instance(tr);
9685 
9686 	mutex_unlock(&trace_types_lock);
9687 	mutex_unlock(&event_mutex);
9688 
9689 	return ret;
9690 }
9691 
create_trace_instances(struct dentry * d_tracer)9692 static __init void create_trace_instances(struct dentry *d_tracer)
9693 {
9694 	struct trace_array *tr;
9695 
9696 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9697 							 instance_mkdir,
9698 							 instance_rmdir);
9699 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9700 		return;
9701 
9702 	mutex_lock(&event_mutex);
9703 	mutex_lock(&trace_types_lock);
9704 
9705 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9706 		if (!tr->name)
9707 			continue;
9708 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9709 			     "Failed to create instance directory\n"))
9710 			break;
9711 	}
9712 
9713 	mutex_unlock(&trace_types_lock);
9714 	mutex_unlock(&event_mutex);
9715 }
9716 
9717 static void
init_tracer_tracefs(struct trace_array * tr,struct dentry * d_tracer)9718 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9719 {
9720 	int cpu;
9721 
9722 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9723 			tr, &show_traces_fops);
9724 
9725 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9726 			tr, &set_tracer_fops);
9727 
9728 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9729 			  tr, &tracing_cpumask_fops);
9730 
9731 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9732 			  tr, &tracing_iter_fops);
9733 
9734 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9735 			  tr, &tracing_fops);
9736 
9737 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9738 			  tr, &tracing_pipe_fops);
9739 
9740 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9741 			  tr, &tracing_entries_fops);
9742 
9743 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9744 			  tr, &tracing_total_entries_fops);
9745 
9746 	trace_create_file("free_buffer", 0200, d_tracer,
9747 			  tr, &tracing_free_buffer_fops);
9748 
9749 	trace_create_file("trace_marker", 0220, d_tracer,
9750 			  tr, &tracing_mark_fops);
9751 
9752 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9753 
9754 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9755 			  tr, &tracing_mark_raw_fops);
9756 
9757 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9758 			  &trace_clock_fops);
9759 
9760 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9761 			  tr, &rb_simple_fops);
9762 
9763 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9764 			  &trace_time_stamp_mode_fops);
9765 
9766 	tr->buffer_percent = 50;
9767 
9768 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9769 			tr, &buffer_percent_fops);
9770 
9771 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9772 			  tr, &buffer_subbuf_size_fops);
9773 
9774 	create_trace_options_dir(tr);
9775 
9776 #ifdef CONFIG_TRACER_MAX_TRACE
9777 	trace_create_maxlat_file(tr, d_tracer);
9778 #endif
9779 
9780 	if (ftrace_create_function_files(tr, d_tracer))
9781 		MEM_FAIL(1, "Could not allocate function filter files");
9782 
9783 	if (tr->range_addr_start) {
9784 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9785 				  tr, &last_boot_fops);
9786 #ifdef CONFIG_TRACER_SNAPSHOT
9787 	} else {
9788 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9789 				  tr, &snapshot_fops);
9790 #endif
9791 	}
9792 
9793 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9794 			  tr, &tracing_err_log_fops);
9795 
9796 	for_each_tracing_cpu(cpu)
9797 		tracing_init_tracefs_percpu(tr, cpu);
9798 
9799 	ftrace_init_tracefs(tr, d_tracer);
9800 }
9801 
trace_automount(struct dentry * mntpt,void * ingore)9802 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9803 {
9804 	struct vfsmount *mnt;
9805 	struct file_system_type *type;
9806 
9807 	/*
9808 	 * To maintain backward compatibility for tools that mount
9809 	 * debugfs to get to the tracing facility, tracefs is automatically
9810 	 * mounted to the debugfs/tracing directory.
9811 	 */
9812 	type = get_fs_type("tracefs");
9813 	if (!type)
9814 		return NULL;
9815 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9816 	put_filesystem(type);
9817 	if (IS_ERR(mnt))
9818 		return NULL;
9819 	mntget(mnt);
9820 
9821 	return mnt;
9822 }
9823 
9824 /**
9825  * tracing_init_dentry - initialize top level trace array
9826  *
9827  * This is called when creating files or directories in the tracing
9828  * directory. It is called via fs_initcall() by any of the boot up code
9829  * and expects to return the dentry of the top level tracing directory.
9830  */
tracing_init_dentry(void)9831 int tracing_init_dentry(void)
9832 {
9833 	struct trace_array *tr = &global_trace;
9834 
9835 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9836 		pr_warn("Tracing disabled due to lockdown\n");
9837 		return -EPERM;
9838 	}
9839 
9840 	/* The top level trace array uses  NULL as parent */
9841 	if (tr->dir)
9842 		return 0;
9843 
9844 	if (WARN_ON(!tracefs_initialized()))
9845 		return -ENODEV;
9846 
9847 	/*
9848 	 * As there may still be users that expect the tracing
9849 	 * files to exist in debugfs/tracing, we must automount
9850 	 * the tracefs file system there, so older tools still
9851 	 * work with the newer kernel.
9852 	 */
9853 	tr->dir = debugfs_create_automount("tracing", NULL,
9854 					   trace_automount, NULL);
9855 
9856 	return 0;
9857 }
9858 
9859 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9860 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9861 
9862 static struct workqueue_struct *eval_map_wq __initdata;
9863 static struct work_struct eval_map_work __initdata;
9864 static struct work_struct tracerfs_init_work __initdata;
9865 
eval_map_work_func(struct work_struct * work)9866 static void __init eval_map_work_func(struct work_struct *work)
9867 {
9868 	int len;
9869 
9870 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
9871 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
9872 }
9873 
trace_eval_init(void)9874 static int __init trace_eval_init(void)
9875 {
9876 	INIT_WORK(&eval_map_work, eval_map_work_func);
9877 
9878 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
9879 	if (!eval_map_wq) {
9880 		pr_err("Unable to allocate eval_map_wq\n");
9881 		/* Do work here */
9882 		eval_map_work_func(&eval_map_work);
9883 		return -ENOMEM;
9884 	}
9885 
9886 	queue_work(eval_map_wq, &eval_map_work);
9887 	return 0;
9888 }
9889 
9890 subsys_initcall(trace_eval_init);
9891 
trace_eval_sync(void)9892 static int __init trace_eval_sync(void)
9893 {
9894 	/* Make sure the eval map updates are finished */
9895 	if (eval_map_wq)
9896 		destroy_workqueue(eval_map_wq);
9897 	return 0;
9898 }
9899 
9900 late_initcall_sync(trace_eval_sync);
9901 
9902 
9903 #ifdef CONFIG_MODULES
trace_module_add_evals(struct module * mod)9904 static void trace_module_add_evals(struct module *mod)
9905 {
9906 	if (!mod->num_trace_evals)
9907 		return;
9908 
9909 	/*
9910 	 * Modules with bad taint do not have events created, do
9911 	 * not bother with enums either.
9912 	 */
9913 	if (trace_module_has_bad_taint(mod))
9914 		return;
9915 
9916 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
9917 }
9918 
9919 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
trace_module_remove_evals(struct module * mod)9920 static void trace_module_remove_evals(struct module *mod)
9921 {
9922 	union trace_eval_map_item *map;
9923 	union trace_eval_map_item **last = &trace_eval_maps;
9924 
9925 	if (!mod->num_trace_evals)
9926 		return;
9927 
9928 	mutex_lock(&trace_eval_mutex);
9929 
9930 	map = trace_eval_maps;
9931 
9932 	while (map) {
9933 		if (map->head.mod == mod)
9934 			break;
9935 		map = trace_eval_jmp_to_tail(map);
9936 		last = &map->tail.next;
9937 		map = map->tail.next;
9938 	}
9939 	if (!map)
9940 		goto out;
9941 
9942 	*last = trace_eval_jmp_to_tail(map)->tail.next;
9943 	kfree(map);
9944  out:
9945 	mutex_unlock(&trace_eval_mutex);
9946 }
9947 #else
trace_module_remove_evals(struct module * mod)9948 static inline void trace_module_remove_evals(struct module *mod) { }
9949 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
9950 
trace_module_notify(struct notifier_block * self,unsigned long val,void * data)9951 static int trace_module_notify(struct notifier_block *self,
9952 			       unsigned long val, void *data)
9953 {
9954 	struct module *mod = data;
9955 
9956 	switch (val) {
9957 	case MODULE_STATE_COMING:
9958 		trace_module_add_evals(mod);
9959 		break;
9960 	case MODULE_STATE_GOING:
9961 		trace_module_remove_evals(mod);
9962 		break;
9963 	}
9964 
9965 	return NOTIFY_OK;
9966 }
9967 
9968 static struct notifier_block trace_module_nb = {
9969 	.notifier_call = trace_module_notify,
9970 	.priority = 0,
9971 };
9972 #endif /* CONFIG_MODULES */
9973 
tracer_init_tracefs_work_func(struct work_struct * work)9974 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
9975 {
9976 
9977 	event_trace_init();
9978 
9979 	init_tracer_tracefs(&global_trace, NULL);
9980 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
9981 
9982 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
9983 			&global_trace, &tracing_thresh_fops);
9984 
9985 	trace_create_file("README", TRACE_MODE_READ, NULL,
9986 			NULL, &tracing_readme_fops);
9987 
9988 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
9989 			NULL, &tracing_saved_cmdlines_fops);
9990 
9991 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
9992 			  NULL, &tracing_saved_cmdlines_size_fops);
9993 
9994 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
9995 			NULL, &tracing_saved_tgids_fops);
9996 
9997 	trace_create_eval_file(NULL);
9998 
9999 #ifdef CONFIG_MODULES
10000 	register_module_notifier(&trace_module_nb);
10001 #endif
10002 
10003 #ifdef CONFIG_DYNAMIC_FTRACE
10004 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10005 			NULL, &tracing_dyn_info_fops);
10006 #endif
10007 
10008 	create_trace_instances(NULL);
10009 
10010 	update_tracer_options(&global_trace);
10011 }
10012 
tracer_init_tracefs(void)10013 static __init int tracer_init_tracefs(void)
10014 {
10015 	int ret;
10016 
10017 	trace_access_lock_init();
10018 
10019 	ret = tracing_init_dentry();
10020 	if (ret)
10021 		return 0;
10022 
10023 	if (eval_map_wq) {
10024 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10025 		queue_work(eval_map_wq, &tracerfs_init_work);
10026 	} else {
10027 		tracer_init_tracefs_work_func(NULL);
10028 	}
10029 
10030 	rv_init_interface();
10031 
10032 	return 0;
10033 }
10034 
10035 fs_initcall(tracer_init_tracefs);
10036 
10037 static int trace_die_panic_handler(struct notifier_block *self,
10038 				unsigned long ev, void *unused);
10039 
10040 static struct notifier_block trace_panic_notifier = {
10041 	.notifier_call = trace_die_panic_handler,
10042 	.priority = INT_MAX - 1,
10043 };
10044 
10045 static struct notifier_block trace_die_notifier = {
10046 	.notifier_call = trace_die_panic_handler,
10047 	.priority = INT_MAX - 1,
10048 };
10049 
10050 /*
10051  * The idea is to execute the following die/panic callback early, in order
10052  * to avoid showing irrelevant information in the trace (like other panic
10053  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10054  * warnings get disabled (to prevent potential log flooding).
10055  */
trace_die_panic_handler(struct notifier_block * self,unsigned long ev,void * unused)10056 static int trace_die_panic_handler(struct notifier_block *self,
10057 				unsigned long ev, void *unused)
10058 {
10059 	if (!ftrace_dump_on_oops_enabled())
10060 		return NOTIFY_DONE;
10061 
10062 	/* The die notifier requires DIE_OOPS to trigger */
10063 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10064 		return NOTIFY_DONE;
10065 
10066 	ftrace_dump(DUMP_PARAM);
10067 
10068 	return NOTIFY_DONE;
10069 }
10070 
10071 /*
10072  * printk is set to max of 1024, we really don't need it that big.
10073  * Nothing should be printing 1000 characters anyway.
10074  */
10075 #define TRACE_MAX_PRINT		1000
10076 
10077 /*
10078  * Define here KERN_TRACE so that we have one place to modify
10079  * it if we decide to change what log level the ftrace dump
10080  * should be at.
10081  */
10082 #define KERN_TRACE		KERN_EMERG
10083 
10084 void
trace_printk_seq(struct trace_seq * s)10085 trace_printk_seq(struct trace_seq *s)
10086 {
10087 	/* Probably should print a warning here. */
10088 	if (s->seq.len >= TRACE_MAX_PRINT)
10089 		s->seq.len = TRACE_MAX_PRINT;
10090 
10091 	/*
10092 	 * More paranoid code. Although the buffer size is set to
10093 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10094 	 * an extra layer of protection.
10095 	 */
10096 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10097 		s->seq.len = s->seq.size - 1;
10098 
10099 	/* should be zero ended, but we are paranoid. */
10100 	s->buffer[s->seq.len] = 0;
10101 
10102 	printk(KERN_TRACE "%s", s->buffer);
10103 
10104 	trace_seq_init(s);
10105 }
10106 
trace_init_iter(struct trace_iterator * iter,struct trace_array * tr)10107 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10108 {
10109 	iter->tr = tr;
10110 	iter->trace = iter->tr->current_trace;
10111 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10112 	iter->array_buffer = &tr->array_buffer;
10113 
10114 	if (iter->trace && iter->trace->open)
10115 		iter->trace->open(iter);
10116 
10117 	/* Annotate start of buffers if we had overruns */
10118 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10119 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10120 
10121 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10122 	if (trace_clocks[iter->tr->clock_id].in_ns)
10123 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10124 
10125 	/* Can not use kmalloc for iter.temp and iter.fmt */
10126 	iter->temp = static_temp_buf;
10127 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10128 	iter->fmt = static_fmt_buf;
10129 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10130 }
10131 
trace_init_global_iter(struct trace_iterator * iter)10132 void trace_init_global_iter(struct trace_iterator *iter)
10133 {
10134 	trace_init_iter(iter, &global_trace);
10135 }
10136 
ftrace_dump_one(struct trace_array * tr,enum ftrace_dump_mode dump_mode)10137 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10138 {
10139 	/* use static because iter can be a bit big for the stack */
10140 	static struct trace_iterator iter;
10141 	unsigned int old_userobj;
10142 	unsigned long flags;
10143 	int cnt = 0, cpu;
10144 
10145 	/*
10146 	 * Always turn off tracing when we dump.
10147 	 * We don't need to show trace output of what happens
10148 	 * between multiple crashes.
10149 	 *
10150 	 * If the user does a sysrq-z, then they can re-enable
10151 	 * tracing with echo 1 > tracing_on.
10152 	 */
10153 	tracer_tracing_off(tr);
10154 
10155 	local_irq_save(flags);
10156 
10157 	/* Simulate the iterator */
10158 	trace_init_iter(&iter, tr);
10159 
10160 	for_each_tracing_cpu(cpu) {
10161 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10162 	}
10163 
10164 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10165 
10166 	/* don't look at user memory in panic mode */
10167 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10168 
10169 	if (dump_mode == DUMP_ORIG)
10170 		iter.cpu_file = raw_smp_processor_id();
10171 	else
10172 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10173 
10174 	if (tr == &global_trace)
10175 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10176 	else
10177 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10178 
10179 	/* Did function tracer already get disabled? */
10180 	if (ftrace_is_dead()) {
10181 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10182 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10183 	}
10184 
10185 	/*
10186 	 * We need to stop all tracing on all CPUS to read
10187 	 * the next buffer. This is a bit expensive, but is
10188 	 * not done often. We fill all what we can read,
10189 	 * and then release the locks again.
10190 	 */
10191 
10192 	while (!trace_empty(&iter)) {
10193 
10194 		if (!cnt)
10195 			printk(KERN_TRACE "---------------------------------\n");
10196 
10197 		cnt++;
10198 
10199 		trace_iterator_reset(&iter);
10200 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10201 
10202 		if (trace_find_next_entry_inc(&iter) != NULL) {
10203 			int ret;
10204 
10205 			ret = print_trace_line(&iter);
10206 			if (ret != TRACE_TYPE_NO_CONSUME)
10207 				trace_consume(&iter);
10208 		}
10209 		touch_nmi_watchdog();
10210 
10211 		trace_printk_seq(&iter.seq);
10212 	}
10213 
10214 	if (!cnt)
10215 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10216 	else
10217 		printk(KERN_TRACE "---------------------------------\n");
10218 
10219 	tr->trace_flags |= old_userobj;
10220 
10221 	for_each_tracing_cpu(cpu) {
10222 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10223 	}
10224 	local_irq_restore(flags);
10225 }
10226 
ftrace_dump_by_param(void)10227 static void ftrace_dump_by_param(void)
10228 {
10229 	bool first_param = true;
10230 	char dump_param[MAX_TRACER_SIZE];
10231 	char *buf, *token, *inst_name;
10232 	struct trace_array *tr;
10233 
10234 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10235 	buf = dump_param;
10236 
10237 	while ((token = strsep(&buf, ",")) != NULL) {
10238 		if (first_param) {
10239 			first_param = false;
10240 			if (!strcmp("0", token))
10241 				continue;
10242 			else if (!strcmp("1", token)) {
10243 				ftrace_dump_one(&global_trace, DUMP_ALL);
10244 				continue;
10245 			}
10246 			else if (!strcmp("2", token) ||
10247 			  !strcmp("orig_cpu", token)) {
10248 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10249 				continue;
10250 			}
10251 		}
10252 
10253 		inst_name = strsep(&token, "=");
10254 		tr = trace_array_find(inst_name);
10255 		if (!tr) {
10256 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10257 			continue;
10258 		}
10259 
10260 		if (token && (!strcmp("2", token) ||
10261 			  !strcmp("orig_cpu", token)))
10262 			ftrace_dump_one(tr, DUMP_ORIG);
10263 		else
10264 			ftrace_dump_one(tr, DUMP_ALL);
10265 	}
10266 }
10267 
ftrace_dump(enum ftrace_dump_mode oops_dump_mode)10268 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10269 {
10270 	static atomic_t dump_running;
10271 
10272 	/* Only allow one dump user at a time. */
10273 	if (atomic_inc_return(&dump_running) != 1) {
10274 		atomic_dec(&dump_running);
10275 		return;
10276 	}
10277 
10278 	switch (oops_dump_mode) {
10279 	case DUMP_ALL:
10280 		ftrace_dump_one(&global_trace, DUMP_ALL);
10281 		break;
10282 	case DUMP_ORIG:
10283 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10284 		break;
10285 	case DUMP_PARAM:
10286 		ftrace_dump_by_param();
10287 		break;
10288 	case DUMP_NONE:
10289 		break;
10290 	default:
10291 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10292 		ftrace_dump_one(&global_trace, DUMP_ALL);
10293 	}
10294 
10295 	atomic_dec(&dump_running);
10296 }
10297 EXPORT_SYMBOL_GPL(ftrace_dump);
10298 
10299 #define WRITE_BUFSIZE  4096
10300 
trace_parse_run_command(struct file * file,const char __user * buffer,size_t count,loff_t * ppos,int (* createfn)(const char *))10301 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10302 				size_t count, loff_t *ppos,
10303 				int (*createfn)(const char *))
10304 {
10305 	char *kbuf, *buf, *tmp;
10306 	int ret = 0;
10307 	size_t done = 0;
10308 	size_t size;
10309 
10310 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10311 	if (!kbuf)
10312 		return -ENOMEM;
10313 
10314 	while (done < count) {
10315 		size = count - done;
10316 
10317 		if (size >= WRITE_BUFSIZE)
10318 			size = WRITE_BUFSIZE - 1;
10319 
10320 		if (copy_from_user(kbuf, buffer + done, size)) {
10321 			ret = -EFAULT;
10322 			goto out;
10323 		}
10324 		kbuf[size] = '\0';
10325 		buf = kbuf;
10326 		do {
10327 			tmp = strchr(buf, '\n');
10328 			if (tmp) {
10329 				*tmp = '\0';
10330 				size = tmp - buf + 1;
10331 			} else {
10332 				size = strlen(buf);
10333 				if (done + size < count) {
10334 					if (buf != kbuf)
10335 						break;
10336 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10337 					pr_warn("Line length is too long: Should be less than %d\n",
10338 						WRITE_BUFSIZE - 2);
10339 					ret = -EINVAL;
10340 					goto out;
10341 				}
10342 			}
10343 			done += size;
10344 
10345 			/* Remove comments */
10346 			tmp = strchr(buf, '#');
10347 
10348 			if (tmp)
10349 				*tmp = '\0';
10350 
10351 			ret = createfn(buf);
10352 			if (ret)
10353 				goto out;
10354 			buf += size;
10355 
10356 		} while (done < count);
10357 	}
10358 	ret = done;
10359 
10360 out:
10361 	kfree(kbuf);
10362 
10363 	return ret;
10364 }
10365 
10366 #ifdef CONFIG_TRACER_MAX_TRACE
tr_needs_alloc_snapshot(const char * name)10367 __init static bool tr_needs_alloc_snapshot(const char *name)
10368 {
10369 	char *test;
10370 	int len = strlen(name);
10371 	bool ret;
10372 
10373 	if (!boot_snapshot_index)
10374 		return false;
10375 
10376 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10377 	    boot_snapshot_info[len] == '\t')
10378 		return true;
10379 
10380 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10381 	if (!test)
10382 		return false;
10383 
10384 	sprintf(test, "\t%s\t", name);
10385 	ret = strstr(boot_snapshot_info, test) == NULL;
10386 	kfree(test);
10387 	return ret;
10388 }
10389 
do_allocate_snapshot(const char * name)10390 __init static void do_allocate_snapshot(const char *name)
10391 {
10392 	if (!tr_needs_alloc_snapshot(name))
10393 		return;
10394 
10395 	/*
10396 	 * When allocate_snapshot is set, the next call to
10397 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10398 	 * will allocate the snapshot buffer. That will alse clear
10399 	 * this flag.
10400 	 */
10401 	allocate_snapshot = true;
10402 }
10403 #else
do_allocate_snapshot(const char * name)10404 static inline void do_allocate_snapshot(const char *name) { }
10405 #endif
10406 
enable_instances(void)10407 __init static void enable_instances(void)
10408 {
10409 	struct trace_array *tr;
10410 	char *curr_str;
10411 	char *name;
10412 	char *str;
10413 	char *tok;
10414 
10415 	/* A tab is always appended */
10416 	boot_instance_info[boot_instance_index - 1] = '\0';
10417 	str = boot_instance_info;
10418 
10419 	while ((curr_str = strsep(&str, "\t"))) {
10420 		phys_addr_t start = 0;
10421 		phys_addr_t size = 0;
10422 		unsigned long addr = 0;
10423 		bool traceprintk = false;
10424 		bool traceoff = false;
10425 		char *flag_delim;
10426 		char *addr_delim;
10427 
10428 		tok = strsep(&curr_str, ",");
10429 
10430 		flag_delim = strchr(tok, '^');
10431 		addr_delim = strchr(tok, '@');
10432 
10433 		if (addr_delim)
10434 			*addr_delim++ = '\0';
10435 
10436 		if (flag_delim)
10437 			*flag_delim++ = '\0';
10438 
10439 		name = tok;
10440 
10441 		if (flag_delim) {
10442 			char *flag;
10443 
10444 			while ((flag = strsep(&flag_delim, "^"))) {
10445 				if (strcmp(flag, "traceoff") == 0) {
10446 					traceoff = true;
10447 				} else if ((strcmp(flag, "printk") == 0) ||
10448 					   (strcmp(flag, "traceprintk") == 0) ||
10449 					   (strcmp(flag, "trace_printk") == 0)) {
10450 					traceprintk = true;
10451 				} else {
10452 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10453 						flag, name);
10454 				}
10455 			}
10456 		}
10457 
10458 		tok = addr_delim;
10459 		if (tok && isdigit(*tok)) {
10460 			start = memparse(tok, &tok);
10461 			if (!start) {
10462 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10463 					name);
10464 				continue;
10465 			}
10466 			if (*tok != ':') {
10467 				pr_warn("Tracing: No size specified for instance %s\n", name);
10468 				continue;
10469 			}
10470 			tok++;
10471 			size = memparse(tok, &tok);
10472 			if (!size) {
10473 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10474 					name);
10475 				continue;
10476 			}
10477 		} else if (tok) {
10478 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10479 				start = 0;
10480 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10481 				continue;
10482 			}
10483 		}
10484 
10485 		if (start) {
10486 			addr = map_pages(start, size);
10487 			if (addr) {
10488 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10489 					name, &start, (unsigned long)size);
10490 			} else {
10491 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10492 				continue;
10493 			}
10494 		} else {
10495 			/* Only non mapped buffers have snapshot buffers */
10496 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10497 				do_allocate_snapshot(name);
10498 		}
10499 
10500 		tr = trace_array_create_systems(name, NULL, addr, size);
10501 		if (IS_ERR(tr)) {
10502 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10503 			continue;
10504 		}
10505 
10506 		if (traceoff)
10507 			tracer_tracing_off(tr);
10508 
10509 		if (traceprintk)
10510 			update_printk_trace(tr);
10511 
10512 		/*
10513 		 * If start is set, then this is a mapped buffer, and
10514 		 * cannot be deleted by user space, so keep the reference
10515 		 * to it.
10516 		 */
10517 		if (start) {
10518 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10519 			tr->ref++;
10520 		}
10521 
10522 		while ((tok = strsep(&curr_str, ","))) {
10523 			early_enable_events(tr, tok, true);
10524 		}
10525 	}
10526 }
10527 
tracer_alloc_buffers(void)10528 __init static int tracer_alloc_buffers(void)
10529 {
10530 	int ring_buf_size;
10531 	int ret = -ENOMEM;
10532 
10533 
10534 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10535 		pr_warn("Tracing disabled due to lockdown\n");
10536 		return -EPERM;
10537 	}
10538 
10539 	/*
10540 	 * Make sure we don't accidentally add more trace options
10541 	 * than we have bits for.
10542 	 */
10543 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10544 
10545 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10546 		goto out;
10547 
10548 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10549 		goto out_free_buffer_mask;
10550 
10551 	/* Only allocate trace_printk buffers if a trace_printk exists */
10552 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10553 		/* Must be called before global_trace.buffer is allocated */
10554 		trace_printk_init_buffers();
10555 
10556 	/* To save memory, keep the ring buffer size to its minimum */
10557 	if (global_trace.ring_buffer_expanded)
10558 		ring_buf_size = trace_buf_size;
10559 	else
10560 		ring_buf_size = 1;
10561 
10562 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10563 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10564 
10565 	raw_spin_lock_init(&global_trace.start_lock);
10566 
10567 	/*
10568 	 * The prepare callbacks allocates some memory for the ring buffer. We
10569 	 * don't free the buffer if the CPU goes down. If we were to free
10570 	 * the buffer, then the user would lose any trace that was in the
10571 	 * buffer. The memory will be removed once the "instance" is removed.
10572 	 */
10573 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10574 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10575 				      NULL);
10576 	if (ret < 0)
10577 		goto out_free_cpumask;
10578 	/* Used for event triggers */
10579 	ret = -ENOMEM;
10580 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10581 	if (!temp_buffer)
10582 		goto out_rm_hp_state;
10583 
10584 	if (trace_create_savedcmd() < 0)
10585 		goto out_free_temp_buffer;
10586 
10587 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10588 		goto out_free_savedcmd;
10589 
10590 	/* TODO: make the number of buffers hot pluggable with CPUS */
10591 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10592 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10593 		goto out_free_pipe_cpumask;
10594 	}
10595 	if (global_trace.buffer_disabled)
10596 		tracing_off();
10597 
10598 	if (trace_boot_clock) {
10599 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10600 		if (ret < 0)
10601 			pr_warn("Trace clock %s not defined, going back to default\n",
10602 				trace_boot_clock);
10603 	}
10604 
10605 	/*
10606 	 * register_tracer() might reference current_trace, so it
10607 	 * needs to be set before we register anything. This is
10608 	 * just a bootstrap of current_trace anyway.
10609 	 */
10610 	global_trace.current_trace = &nop_trace;
10611 
10612 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10613 #ifdef CONFIG_TRACER_MAX_TRACE
10614 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10615 #endif
10616 	ftrace_init_global_array_ops(&global_trace);
10617 
10618 	init_trace_flags_index(&global_trace);
10619 
10620 	register_tracer(&nop_trace);
10621 
10622 	/* Function tracing may start here (via kernel command line) */
10623 	init_function_trace();
10624 
10625 	/* All seems OK, enable tracing */
10626 	tracing_disabled = 0;
10627 
10628 	atomic_notifier_chain_register(&panic_notifier_list,
10629 				       &trace_panic_notifier);
10630 
10631 	register_die_notifier(&trace_die_notifier);
10632 
10633 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10634 
10635 	INIT_LIST_HEAD(&global_trace.systems);
10636 	INIT_LIST_HEAD(&global_trace.events);
10637 	INIT_LIST_HEAD(&global_trace.hist_vars);
10638 	INIT_LIST_HEAD(&global_trace.err_log);
10639 	list_add(&global_trace.list, &ftrace_trace_arrays);
10640 
10641 	apply_trace_boot_options();
10642 
10643 	register_snapshot_cmd();
10644 
10645 	return 0;
10646 
10647 out_free_pipe_cpumask:
10648 	free_cpumask_var(global_trace.pipe_cpumask);
10649 out_free_savedcmd:
10650 	trace_free_saved_cmdlines_buffer();
10651 out_free_temp_buffer:
10652 	ring_buffer_free(temp_buffer);
10653 out_rm_hp_state:
10654 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10655 out_free_cpumask:
10656 	free_cpumask_var(global_trace.tracing_cpumask);
10657 out_free_buffer_mask:
10658 	free_cpumask_var(tracing_buffer_mask);
10659 out:
10660 	return ret;
10661 }
10662 
ftrace_boot_snapshot(void)10663 void __init ftrace_boot_snapshot(void)
10664 {
10665 #ifdef CONFIG_TRACER_MAX_TRACE
10666 	struct trace_array *tr;
10667 
10668 	if (!snapshot_at_boot)
10669 		return;
10670 
10671 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10672 		if (!tr->allocated_snapshot)
10673 			continue;
10674 
10675 		tracing_snapshot_instance(tr);
10676 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10677 	}
10678 #endif
10679 }
10680 
early_trace_init(void)10681 void __init early_trace_init(void)
10682 {
10683 	if (tracepoint_printk) {
10684 		tracepoint_print_iter =
10685 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10686 		if (MEM_FAIL(!tracepoint_print_iter,
10687 			     "Failed to allocate trace iterator\n"))
10688 			tracepoint_printk = 0;
10689 		else
10690 			static_key_enable(&tracepoint_printk_key.key);
10691 	}
10692 	tracer_alloc_buffers();
10693 
10694 	init_events();
10695 }
10696 
trace_init(void)10697 void __init trace_init(void)
10698 {
10699 	trace_event_init();
10700 
10701 	if (boot_instance_index)
10702 		enable_instances();
10703 }
10704 
clear_boot_tracer(void)10705 __init static void clear_boot_tracer(void)
10706 {
10707 	/*
10708 	 * The default tracer at boot buffer is an init section.
10709 	 * This function is called in lateinit. If we did not
10710 	 * find the boot tracer, then clear it out, to prevent
10711 	 * later registration from accessing the buffer that is
10712 	 * about to be freed.
10713 	 */
10714 	if (!default_bootup_tracer)
10715 		return;
10716 
10717 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10718 	       default_bootup_tracer);
10719 	default_bootup_tracer = NULL;
10720 }
10721 
10722 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
tracing_set_default_clock(void)10723 __init static void tracing_set_default_clock(void)
10724 {
10725 	/* sched_clock_stable() is determined in late_initcall */
10726 	if (!trace_boot_clock && !sched_clock_stable()) {
10727 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10728 			pr_warn("Can not set tracing clock due to lockdown\n");
10729 			return;
10730 		}
10731 
10732 		printk(KERN_WARNING
10733 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10734 		       "If you want to keep using the local clock, then add:\n"
10735 		       "  \"trace_clock=local\"\n"
10736 		       "on the kernel command line\n");
10737 		tracing_set_clock(&global_trace, "global");
10738 	}
10739 }
10740 #else
tracing_set_default_clock(void)10741 static inline void tracing_set_default_clock(void) { }
10742 #endif
10743 
late_trace_init(void)10744 __init static int late_trace_init(void)
10745 {
10746 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10747 		static_key_disable(&tracepoint_printk_key.key);
10748 		tracepoint_printk = 0;
10749 	}
10750 
10751 	tracing_set_default_clock();
10752 	clear_boot_tracer();
10753 	return 0;
10754 }
10755 
10756 late_initcall_sync(late_trace_init);
10757