xref: /linux/kernel/trace/trace.c (revision eb7cca1faf9883d7b4da792281147dbedc449238)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <generated/utsrelease.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/kmemleak.h>
43 #include <linux/poll.h>
44 #include <linux/nmi.h>
45 #include <linux/fs.h>
46 #include <linux/trace.h>
47 #include <linux/sched/clock.h>
48 #include <linux/sched/rt.h>
49 #include <linux/fsnotify.h>
50 #include <linux/irq_work.h>
51 #include <linux/workqueue.h>
52 
53 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
54 
55 #include "trace.h"
56 #include "trace_output.h"
57 
58 #ifdef CONFIG_FTRACE_STARTUP_TEST
59 /*
60  * We need to change this state when a selftest is running.
61  * A selftest will lurk into the ring-buffer to count the
62  * entries inserted during the selftest although some concurrent
63  * insertions into the ring-buffer such as trace_printk could occurred
64  * at the same time, giving false positive or negative results.
65  */
66 static bool __read_mostly tracing_selftest_running;
67 
68 /*
69  * If boot-time tracing including tracers/events via kernel cmdline
70  * is running, we do not want to run SELFTEST.
71  */
72 bool __read_mostly tracing_selftest_disabled;
73 
74 void __init disable_tracing_selftest(const char *reason)
75 {
76 	if (!tracing_selftest_disabled) {
77 		tracing_selftest_disabled = true;
78 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
79 	}
80 }
81 #else
82 #define tracing_selftest_running	0
83 #define tracing_selftest_disabled	0
84 #endif
85 
86 /* Pipe tracepoints to printk */
87 static struct trace_iterator *tracepoint_print_iter;
88 int tracepoint_printk;
89 static bool tracepoint_printk_stop_on_boot __initdata;
90 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
91 
92 /* For tracers that don't implement custom flags */
93 static struct tracer_opt dummy_tracer_opt[] = {
94 	{ }
95 };
96 
97 static int
98 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
99 {
100 	return 0;
101 }
102 
103 /*
104  * To prevent the comm cache from being overwritten when no
105  * tracing is active, only save the comm when a trace event
106  * occurred.
107  */
108 static DEFINE_PER_CPU(bool, trace_taskinfo_save);
109 
110 /*
111  * Kill all tracing for good (never come back).
112  * It is initialized to 1 but will turn to zero if the initialization
113  * of the tracer is successful. But that is the only place that sets
114  * this back to zero.
115  */
116 static int tracing_disabled = 1;
117 
118 cpumask_var_t __read_mostly	tracing_buffer_mask;
119 
120 /*
121  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
122  *
123  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
124  * is set, then ftrace_dump is called. This will output the contents
125  * of the ftrace buffers to the console.  This is very useful for
126  * capturing traces that lead to crashes and outputing it to a
127  * serial console.
128  *
129  * It is default off, but you can enable it with either specifying
130  * "ftrace_dump_on_oops" in the kernel command line, or setting
131  * /proc/sys/kernel/ftrace_dump_on_oops
132  * Set 1 if you want to dump buffers of all CPUs
133  * Set 2 if you want to dump the buffer of the CPU that triggered oops
134  */
135 
136 enum ftrace_dump_mode ftrace_dump_on_oops;
137 
138 /* When set, tracing will stop when a WARN*() is hit */
139 int __disable_trace_on_warning;
140 
141 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
142 /* Map of enums to their values, for "eval_map" file */
143 struct trace_eval_map_head {
144 	struct module			*mod;
145 	unsigned long			length;
146 };
147 
148 union trace_eval_map_item;
149 
150 struct trace_eval_map_tail {
151 	/*
152 	 * "end" is first and points to NULL as it must be different
153 	 * than "mod" or "eval_string"
154 	 */
155 	union trace_eval_map_item	*next;
156 	const char			*end;	/* points to NULL */
157 };
158 
159 static DEFINE_MUTEX(trace_eval_mutex);
160 
161 /*
162  * The trace_eval_maps are saved in an array with two extra elements,
163  * one at the beginning, and one at the end. The beginning item contains
164  * the count of the saved maps (head.length), and the module they
165  * belong to if not built in (head.mod). The ending item contains a
166  * pointer to the next array of saved eval_map items.
167  */
168 union trace_eval_map_item {
169 	struct trace_eval_map		map;
170 	struct trace_eval_map_head	head;
171 	struct trace_eval_map_tail	tail;
172 };
173 
174 static union trace_eval_map_item *trace_eval_maps;
175 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
176 
177 int tracing_set_tracer(struct trace_array *tr, const char *buf);
178 static void ftrace_trace_userstack(struct trace_array *tr,
179 				   struct trace_buffer *buffer,
180 				   unsigned int trace_ctx);
181 
182 #define MAX_TRACER_SIZE		100
183 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
184 static char *default_bootup_tracer;
185 
186 static bool allocate_snapshot;
187 static bool snapshot_at_boot;
188 
189 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
190 static int boot_instance_index;
191 
192 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
193 static int boot_snapshot_index;
194 
195 static int __init set_cmdline_ftrace(char *str)
196 {
197 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
198 	default_bootup_tracer = bootup_tracer_buf;
199 	/* We are using ftrace early, expand it */
200 	trace_set_ring_buffer_expanded(NULL);
201 	return 1;
202 }
203 __setup("ftrace=", set_cmdline_ftrace);
204 
205 static int __init set_ftrace_dump_on_oops(char *str)
206 {
207 	if (*str++ != '=' || !*str || !strcmp("1", str)) {
208 		ftrace_dump_on_oops = DUMP_ALL;
209 		return 1;
210 	}
211 
212 	if (!strcmp("orig_cpu", str) || !strcmp("2", str)) {
213 		ftrace_dump_on_oops = DUMP_ORIG;
214                 return 1;
215         }
216 
217         return 0;
218 }
219 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
220 
221 static int __init stop_trace_on_warning(char *str)
222 {
223 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
224 		__disable_trace_on_warning = 1;
225 	return 1;
226 }
227 __setup("traceoff_on_warning", stop_trace_on_warning);
228 
229 static int __init boot_alloc_snapshot(char *str)
230 {
231 	char *slot = boot_snapshot_info + boot_snapshot_index;
232 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
233 	int ret;
234 
235 	if (str[0] == '=') {
236 		str++;
237 		if (strlen(str) >= left)
238 			return -1;
239 
240 		ret = snprintf(slot, left, "%s\t", str);
241 		boot_snapshot_index += ret;
242 	} else {
243 		allocate_snapshot = true;
244 		/* We also need the main ring buffer expanded */
245 		trace_set_ring_buffer_expanded(NULL);
246 	}
247 	return 1;
248 }
249 __setup("alloc_snapshot", boot_alloc_snapshot);
250 
251 
252 static int __init boot_snapshot(char *str)
253 {
254 	snapshot_at_boot = true;
255 	boot_alloc_snapshot(str);
256 	return 1;
257 }
258 __setup("ftrace_boot_snapshot", boot_snapshot);
259 
260 
261 static int __init boot_instance(char *str)
262 {
263 	char *slot = boot_instance_info + boot_instance_index;
264 	int left = sizeof(boot_instance_info) - boot_instance_index;
265 	int ret;
266 
267 	if (strlen(str) >= left)
268 		return -1;
269 
270 	ret = snprintf(slot, left, "%s\t", str);
271 	boot_instance_index += ret;
272 
273 	return 1;
274 }
275 __setup("trace_instance=", boot_instance);
276 
277 
278 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
279 
280 static int __init set_trace_boot_options(char *str)
281 {
282 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
283 	return 1;
284 }
285 __setup("trace_options=", set_trace_boot_options);
286 
287 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
288 static char *trace_boot_clock __initdata;
289 
290 static int __init set_trace_boot_clock(char *str)
291 {
292 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
293 	trace_boot_clock = trace_boot_clock_buf;
294 	return 1;
295 }
296 __setup("trace_clock=", set_trace_boot_clock);
297 
298 static int __init set_tracepoint_printk(char *str)
299 {
300 	/* Ignore the "tp_printk_stop_on_boot" param */
301 	if (*str == '_')
302 		return 0;
303 
304 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
305 		tracepoint_printk = 1;
306 	return 1;
307 }
308 __setup("tp_printk", set_tracepoint_printk);
309 
310 static int __init set_tracepoint_printk_stop(char *str)
311 {
312 	tracepoint_printk_stop_on_boot = true;
313 	return 1;
314 }
315 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
316 
317 unsigned long long ns2usecs(u64 nsec)
318 {
319 	nsec += 500;
320 	do_div(nsec, 1000);
321 	return nsec;
322 }
323 
324 static void
325 trace_process_export(struct trace_export *export,
326 	       struct ring_buffer_event *event, int flag)
327 {
328 	struct trace_entry *entry;
329 	unsigned int size = 0;
330 
331 	if (export->flags & flag) {
332 		entry = ring_buffer_event_data(event);
333 		size = ring_buffer_event_length(event);
334 		export->write(export, entry, size);
335 	}
336 }
337 
338 static DEFINE_MUTEX(ftrace_export_lock);
339 
340 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
341 
342 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
343 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
344 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
345 
346 static inline void ftrace_exports_enable(struct trace_export *export)
347 {
348 	if (export->flags & TRACE_EXPORT_FUNCTION)
349 		static_branch_inc(&trace_function_exports_enabled);
350 
351 	if (export->flags & TRACE_EXPORT_EVENT)
352 		static_branch_inc(&trace_event_exports_enabled);
353 
354 	if (export->flags & TRACE_EXPORT_MARKER)
355 		static_branch_inc(&trace_marker_exports_enabled);
356 }
357 
358 static inline void ftrace_exports_disable(struct trace_export *export)
359 {
360 	if (export->flags & TRACE_EXPORT_FUNCTION)
361 		static_branch_dec(&trace_function_exports_enabled);
362 
363 	if (export->flags & TRACE_EXPORT_EVENT)
364 		static_branch_dec(&trace_event_exports_enabled);
365 
366 	if (export->flags & TRACE_EXPORT_MARKER)
367 		static_branch_dec(&trace_marker_exports_enabled);
368 }
369 
370 static void ftrace_exports(struct ring_buffer_event *event, int flag)
371 {
372 	struct trace_export *export;
373 
374 	preempt_disable_notrace();
375 
376 	export = rcu_dereference_raw_check(ftrace_exports_list);
377 	while (export) {
378 		trace_process_export(export, event, flag);
379 		export = rcu_dereference_raw_check(export->next);
380 	}
381 
382 	preempt_enable_notrace();
383 }
384 
385 static inline void
386 add_trace_export(struct trace_export **list, struct trace_export *export)
387 {
388 	rcu_assign_pointer(export->next, *list);
389 	/*
390 	 * We are entering export into the list but another
391 	 * CPU might be walking that list. We need to make sure
392 	 * the export->next pointer is valid before another CPU sees
393 	 * the export pointer included into the list.
394 	 */
395 	rcu_assign_pointer(*list, export);
396 }
397 
398 static inline int
399 rm_trace_export(struct trace_export **list, struct trace_export *export)
400 {
401 	struct trace_export **p;
402 
403 	for (p = list; *p != NULL; p = &(*p)->next)
404 		if (*p == export)
405 			break;
406 
407 	if (*p != export)
408 		return -1;
409 
410 	rcu_assign_pointer(*p, (*p)->next);
411 
412 	return 0;
413 }
414 
415 static inline void
416 add_ftrace_export(struct trace_export **list, struct trace_export *export)
417 {
418 	ftrace_exports_enable(export);
419 
420 	add_trace_export(list, export);
421 }
422 
423 static inline int
424 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
425 {
426 	int ret;
427 
428 	ret = rm_trace_export(list, export);
429 	ftrace_exports_disable(export);
430 
431 	return ret;
432 }
433 
434 int register_ftrace_export(struct trace_export *export)
435 {
436 	if (WARN_ON_ONCE(!export->write))
437 		return -1;
438 
439 	mutex_lock(&ftrace_export_lock);
440 
441 	add_ftrace_export(&ftrace_exports_list, export);
442 
443 	mutex_unlock(&ftrace_export_lock);
444 
445 	return 0;
446 }
447 EXPORT_SYMBOL_GPL(register_ftrace_export);
448 
449 int unregister_ftrace_export(struct trace_export *export)
450 {
451 	int ret;
452 
453 	mutex_lock(&ftrace_export_lock);
454 
455 	ret = rm_ftrace_export(&ftrace_exports_list, export);
456 
457 	mutex_unlock(&ftrace_export_lock);
458 
459 	return ret;
460 }
461 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
462 
463 /* trace_flags holds trace_options default values */
464 #define TRACE_DEFAULT_FLAGS						\
465 	(FUNCTION_DEFAULT_FLAGS |					\
466 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
467 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
468 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
469 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
470 	 TRACE_ITER_HASH_PTR)
471 
472 /* trace_options that are only supported by global_trace */
473 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
474 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
475 
476 /* trace_flags that are default zero for instances */
477 #define ZEROED_TRACE_FLAGS \
478 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK)
479 
480 /*
481  * The global_trace is the descriptor that holds the top-level tracing
482  * buffers for the live tracing.
483  */
484 static struct trace_array global_trace = {
485 	.trace_flags = TRACE_DEFAULT_FLAGS,
486 };
487 
488 void trace_set_ring_buffer_expanded(struct trace_array *tr)
489 {
490 	if (!tr)
491 		tr = &global_trace;
492 	tr->ring_buffer_expanded = true;
493 }
494 
495 LIST_HEAD(ftrace_trace_arrays);
496 
497 int trace_array_get(struct trace_array *this_tr)
498 {
499 	struct trace_array *tr;
500 	int ret = -ENODEV;
501 
502 	mutex_lock(&trace_types_lock);
503 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
504 		if (tr == this_tr) {
505 			tr->ref++;
506 			ret = 0;
507 			break;
508 		}
509 	}
510 	mutex_unlock(&trace_types_lock);
511 
512 	return ret;
513 }
514 
515 static void __trace_array_put(struct trace_array *this_tr)
516 {
517 	WARN_ON(!this_tr->ref);
518 	this_tr->ref--;
519 }
520 
521 /**
522  * trace_array_put - Decrement the reference counter for this trace array.
523  * @this_tr : pointer to the trace array
524  *
525  * NOTE: Use this when we no longer need the trace array returned by
526  * trace_array_get_by_name(). This ensures the trace array can be later
527  * destroyed.
528  *
529  */
530 void trace_array_put(struct trace_array *this_tr)
531 {
532 	if (!this_tr)
533 		return;
534 
535 	mutex_lock(&trace_types_lock);
536 	__trace_array_put(this_tr);
537 	mutex_unlock(&trace_types_lock);
538 }
539 EXPORT_SYMBOL_GPL(trace_array_put);
540 
541 int tracing_check_open_get_tr(struct trace_array *tr)
542 {
543 	int ret;
544 
545 	ret = security_locked_down(LOCKDOWN_TRACEFS);
546 	if (ret)
547 		return ret;
548 
549 	if (tracing_disabled)
550 		return -ENODEV;
551 
552 	if (tr && trace_array_get(tr) < 0)
553 		return -ENODEV;
554 
555 	return 0;
556 }
557 
558 int call_filter_check_discard(struct trace_event_call *call, void *rec,
559 			      struct trace_buffer *buffer,
560 			      struct ring_buffer_event *event)
561 {
562 	if (unlikely(call->flags & TRACE_EVENT_FL_FILTERED) &&
563 	    !filter_match_preds(call->filter, rec)) {
564 		__trace_event_discard_commit(buffer, event);
565 		return 1;
566 	}
567 
568 	return 0;
569 }
570 
571 /**
572  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
573  * @filtered_pids: The list of pids to check
574  * @search_pid: The PID to find in @filtered_pids
575  *
576  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
577  */
578 bool
579 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
580 {
581 	return trace_pid_list_is_set(filtered_pids, search_pid);
582 }
583 
584 /**
585  * trace_ignore_this_task - should a task be ignored for tracing
586  * @filtered_pids: The list of pids to check
587  * @filtered_no_pids: The list of pids not to be traced
588  * @task: The task that should be ignored if not filtered
589  *
590  * Checks if @task should be traced or not from @filtered_pids.
591  * Returns true if @task should *NOT* be traced.
592  * Returns false if @task should be traced.
593  */
594 bool
595 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
596 		       struct trace_pid_list *filtered_no_pids,
597 		       struct task_struct *task)
598 {
599 	/*
600 	 * If filtered_no_pids is not empty, and the task's pid is listed
601 	 * in filtered_no_pids, then return true.
602 	 * Otherwise, if filtered_pids is empty, that means we can
603 	 * trace all tasks. If it has content, then only trace pids
604 	 * within filtered_pids.
605 	 */
606 
607 	return (filtered_pids &&
608 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
609 		(filtered_no_pids &&
610 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
611 }
612 
613 /**
614  * trace_filter_add_remove_task - Add or remove a task from a pid_list
615  * @pid_list: The list to modify
616  * @self: The current task for fork or NULL for exit
617  * @task: The task to add or remove
618  *
619  * If adding a task, if @self is defined, the task is only added if @self
620  * is also included in @pid_list. This happens on fork and tasks should
621  * only be added when the parent is listed. If @self is NULL, then the
622  * @task pid will be removed from the list, which would happen on exit
623  * of a task.
624  */
625 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
626 				  struct task_struct *self,
627 				  struct task_struct *task)
628 {
629 	if (!pid_list)
630 		return;
631 
632 	/* For forks, we only add if the forking task is listed */
633 	if (self) {
634 		if (!trace_find_filtered_pid(pid_list, self->pid))
635 			return;
636 	}
637 
638 	/* "self" is set for forks, and NULL for exits */
639 	if (self)
640 		trace_pid_list_set(pid_list, task->pid);
641 	else
642 		trace_pid_list_clear(pid_list, task->pid);
643 }
644 
645 /**
646  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
647  * @pid_list: The pid list to show
648  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
649  * @pos: The position of the file
650  *
651  * This is used by the seq_file "next" operation to iterate the pids
652  * listed in a trace_pid_list structure.
653  *
654  * Returns the pid+1 as we want to display pid of zero, but NULL would
655  * stop the iteration.
656  */
657 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
658 {
659 	long pid = (unsigned long)v;
660 	unsigned int next;
661 
662 	(*pos)++;
663 
664 	/* pid already is +1 of the actual previous bit */
665 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
666 		return NULL;
667 
668 	pid = next;
669 
670 	/* Return pid + 1 to allow zero to be represented */
671 	return (void *)(pid + 1);
672 }
673 
674 /**
675  * trace_pid_start - Used for seq_file to start reading pid lists
676  * @pid_list: The pid list to show
677  * @pos: The position of the file
678  *
679  * This is used by seq_file "start" operation to start the iteration
680  * of listing pids.
681  *
682  * Returns the pid+1 as we want to display pid of zero, but NULL would
683  * stop the iteration.
684  */
685 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
686 {
687 	unsigned long pid;
688 	unsigned int first;
689 	loff_t l = 0;
690 
691 	if (trace_pid_list_first(pid_list, &first) < 0)
692 		return NULL;
693 
694 	pid = first;
695 
696 	/* Return pid + 1 so that zero can be the exit value */
697 	for (pid++; pid && l < *pos;
698 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
699 		;
700 	return (void *)pid;
701 }
702 
703 /**
704  * trace_pid_show - show the current pid in seq_file processing
705  * @m: The seq_file structure to write into
706  * @v: A void pointer of the pid (+1) value to display
707  *
708  * Can be directly used by seq_file operations to display the current
709  * pid value.
710  */
711 int trace_pid_show(struct seq_file *m, void *v)
712 {
713 	unsigned long pid = (unsigned long)v - 1;
714 
715 	seq_printf(m, "%lu\n", pid);
716 	return 0;
717 }
718 
719 /* 128 should be much more than enough */
720 #define PID_BUF_SIZE		127
721 
722 int trace_pid_write(struct trace_pid_list *filtered_pids,
723 		    struct trace_pid_list **new_pid_list,
724 		    const char __user *ubuf, size_t cnt)
725 {
726 	struct trace_pid_list *pid_list;
727 	struct trace_parser parser;
728 	unsigned long val;
729 	int nr_pids = 0;
730 	ssize_t read = 0;
731 	ssize_t ret;
732 	loff_t pos;
733 	pid_t pid;
734 
735 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
736 		return -ENOMEM;
737 
738 	/*
739 	 * Always recreate a new array. The write is an all or nothing
740 	 * operation. Always create a new array when adding new pids by
741 	 * the user. If the operation fails, then the current list is
742 	 * not modified.
743 	 */
744 	pid_list = trace_pid_list_alloc();
745 	if (!pid_list) {
746 		trace_parser_put(&parser);
747 		return -ENOMEM;
748 	}
749 
750 	if (filtered_pids) {
751 		/* copy the current bits to the new max */
752 		ret = trace_pid_list_first(filtered_pids, &pid);
753 		while (!ret) {
754 			trace_pid_list_set(pid_list, pid);
755 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
756 			nr_pids++;
757 		}
758 	}
759 
760 	ret = 0;
761 	while (cnt > 0) {
762 
763 		pos = 0;
764 
765 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
766 		if (ret < 0)
767 			break;
768 
769 		read += ret;
770 		ubuf += ret;
771 		cnt -= ret;
772 
773 		if (!trace_parser_loaded(&parser))
774 			break;
775 
776 		ret = -EINVAL;
777 		if (kstrtoul(parser.buffer, 0, &val))
778 			break;
779 
780 		pid = (pid_t)val;
781 
782 		if (trace_pid_list_set(pid_list, pid) < 0) {
783 			ret = -1;
784 			break;
785 		}
786 		nr_pids++;
787 
788 		trace_parser_clear(&parser);
789 		ret = 0;
790 	}
791 	trace_parser_put(&parser);
792 
793 	if (ret < 0) {
794 		trace_pid_list_free(pid_list);
795 		return ret;
796 	}
797 
798 	if (!nr_pids) {
799 		/* Cleared the list of pids */
800 		trace_pid_list_free(pid_list);
801 		pid_list = NULL;
802 	}
803 
804 	*new_pid_list = pid_list;
805 
806 	return read;
807 }
808 
809 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
810 {
811 	u64 ts;
812 
813 	/* Early boot up does not have a buffer yet */
814 	if (!buf->buffer)
815 		return trace_clock_local();
816 
817 	ts = ring_buffer_time_stamp(buf->buffer);
818 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
819 
820 	return ts;
821 }
822 
823 u64 ftrace_now(int cpu)
824 {
825 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
826 }
827 
828 /**
829  * tracing_is_enabled - Show if global_trace has been enabled
830  *
831  * Shows if the global trace has been enabled or not. It uses the
832  * mirror flag "buffer_disabled" to be used in fast paths such as for
833  * the irqsoff tracer. But it may be inaccurate due to races. If you
834  * need to know the accurate state, use tracing_is_on() which is a little
835  * slower, but accurate.
836  */
837 int tracing_is_enabled(void)
838 {
839 	/*
840 	 * For quick access (irqsoff uses this in fast path), just
841 	 * return the mirror variable of the state of the ring buffer.
842 	 * It's a little racy, but we don't really care.
843 	 */
844 	smp_rmb();
845 	return !global_trace.buffer_disabled;
846 }
847 
848 /*
849  * trace_buf_size is the size in bytes that is allocated
850  * for a buffer. Note, the number of bytes is always rounded
851  * to page size.
852  *
853  * This number is purposely set to a low number of 16384.
854  * If the dump on oops happens, it will be much appreciated
855  * to not have to wait for all that output. Anyway this can be
856  * boot time and run time configurable.
857  */
858 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
859 
860 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
861 
862 /* trace_types holds a link list of available tracers. */
863 static struct tracer		*trace_types __read_mostly;
864 
865 /*
866  * trace_types_lock is used to protect the trace_types list.
867  */
868 DEFINE_MUTEX(trace_types_lock);
869 
870 /*
871  * serialize the access of the ring buffer
872  *
873  * ring buffer serializes readers, but it is low level protection.
874  * The validity of the events (which returns by ring_buffer_peek() ..etc)
875  * are not protected by ring buffer.
876  *
877  * The content of events may become garbage if we allow other process consumes
878  * these events concurrently:
879  *   A) the page of the consumed events may become a normal page
880  *      (not reader page) in ring buffer, and this page will be rewritten
881  *      by events producer.
882  *   B) The page of the consumed events may become a page for splice_read,
883  *      and this page will be returned to system.
884  *
885  * These primitives allow multi process access to different cpu ring buffer
886  * concurrently.
887  *
888  * These primitives don't distinguish read-only and read-consume access.
889  * Multi read-only access are also serialized.
890  */
891 
892 #ifdef CONFIG_SMP
893 static DECLARE_RWSEM(all_cpu_access_lock);
894 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
895 
896 static inline void trace_access_lock(int cpu)
897 {
898 	if (cpu == RING_BUFFER_ALL_CPUS) {
899 		/* gain it for accessing the whole ring buffer. */
900 		down_write(&all_cpu_access_lock);
901 	} else {
902 		/* gain it for accessing a cpu ring buffer. */
903 
904 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
905 		down_read(&all_cpu_access_lock);
906 
907 		/* Secondly block other access to this @cpu ring buffer. */
908 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
909 	}
910 }
911 
912 static inline void trace_access_unlock(int cpu)
913 {
914 	if (cpu == RING_BUFFER_ALL_CPUS) {
915 		up_write(&all_cpu_access_lock);
916 	} else {
917 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
918 		up_read(&all_cpu_access_lock);
919 	}
920 }
921 
922 static inline void trace_access_lock_init(void)
923 {
924 	int cpu;
925 
926 	for_each_possible_cpu(cpu)
927 		mutex_init(&per_cpu(cpu_access_lock, cpu));
928 }
929 
930 #else
931 
932 static DEFINE_MUTEX(access_lock);
933 
934 static inline void trace_access_lock(int cpu)
935 {
936 	(void)cpu;
937 	mutex_lock(&access_lock);
938 }
939 
940 static inline void trace_access_unlock(int cpu)
941 {
942 	(void)cpu;
943 	mutex_unlock(&access_lock);
944 }
945 
946 static inline void trace_access_lock_init(void)
947 {
948 }
949 
950 #endif
951 
952 #ifdef CONFIG_STACKTRACE
953 static void __ftrace_trace_stack(struct trace_buffer *buffer,
954 				 unsigned int trace_ctx,
955 				 int skip, struct pt_regs *regs);
956 static inline void ftrace_trace_stack(struct trace_array *tr,
957 				      struct trace_buffer *buffer,
958 				      unsigned int trace_ctx,
959 				      int skip, struct pt_regs *regs);
960 
961 #else
962 static inline void __ftrace_trace_stack(struct trace_buffer *buffer,
963 					unsigned int trace_ctx,
964 					int skip, struct pt_regs *regs)
965 {
966 }
967 static inline void ftrace_trace_stack(struct trace_array *tr,
968 				      struct trace_buffer *buffer,
969 				      unsigned long trace_ctx,
970 				      int skip, struct pt_regs *regs)
971 {
972 }
973 
974 #endif
975 
976 static __always_inline void
977 trace_event_setup(struct ring_buffer_event *event,
978 		  int type, unsigned int trace_ctx)
979 {
980 	struct trace_entry *ent = ring_buffer_event_data(event);
981 
982 	tracing_generic_entry_update(ent, type, trace_ctx);
983 }
984 
985 static __always_inline struct ring_buffer_event *
986 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
987 			  int type,
988 			  unsigned long len,
989 			  unsigned int trace_ctx)
990 {
991 	struct ring_buffer_event *event;
992 
993 	event = ring_buffer_lock_reserve(buffer, len);
994 	if (event != NULL)
995 		trace_event_setup(event, type, trace_ctx);
996 
997 	return event;
998 }
999 
1000 void tracer_tracing_on(struct trace_array *tr)
1001 {
1002 	if (tr->array_buffer.buffer)
1003 		ring_buffer_record_on(tr->array_buffer.buffer);
1004 	/*
1005 	 * This flag is looked at when buffers haven't been allocated
1006 	 * yet, or by some tracers (like irqsoff), that just want to
1007 	 * know if the ring buffer has been disabled, but it can handle
1008 	 * races of where it gets disabled but we still do a record.
1009 	 * As the check is in the fast path of the tracers, it is more
1010 	 * important to be fast than accurate.
1011 	 */
1012 	tr->buffer_disabled = 0;
1013 	/* Make the flag seen by readers */
1014 	smp_wmb();
1015 }
1016 
1017 /**
1018  * tracing_on - enable tracing buffers
1019  *
1020  * This function enables tracing buffers that may have been
1021  * disabled with tracing_off.
1022  */
1023 void tracing_on(void)
1024 {
1025 	tracer_tracing_on(&global_trace);
1026 }
1027 EXPORT_SYMBOL_GPL(tracing_on);
1028 
1029 
1030 static __always_inline void
1031 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1032 {
1033 	__this_cpu_write(trace_taskinfo_save, true);
1034 
1035 	/* If this is the temp buffer, we need to commit fully */
1036 	if (this_cpu_read(trace_buffered_event) == event) {
1037 		/* Length is in event->array[0] */
1038 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1039 		/* Release the temp buffer */
1040 		this_cpu_dec(trace_buffered_event_cnt);
1041 		/* ring_buffer_unlock_commit() enables preemption */
1042 		preempt_enable_notrace();
1043 	} else
1044 		ring_buffer_unlock_commit(buffer);
1045 }
1046 
1047 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1048 		       const char *str, int size)
1049 {
1050 	struct ring_buffer_event *event;
1051 	struct trace_buffer *buffer;
1052 	struct print_entry *entry;
1053 	unsigned int trace_ctx;
1054 	int alloc;
1055 
1056 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1057 		return 0;
1058 
1059 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1060 		return 0;
1061 
1062 	if (unlikely(tracing_disabled))
1063 		return 0;
1064 
1065 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1066 
1067 	trace_ctx = tracing_gen_ctx();
1068 	buffer = tr->array_buffer.buffer;
1069 	ring_buffer_nest_start(buffer);
1070 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1071 					    trace_ctx);
1072 	if (!event) {
1073 		size = 0;
1074 		goto out;
1075 	}
1076 
1077 	entry = ring_buffer_event_data(event);
1078 	entry->ip = ip;
1079 
1080 	memcpy(&entry->buf, str, size);
1081 
1082 	/* Add a newline if necessary */
1083 	if (entry->buf[size - 1] != '\n') {
1084 		entry->buf[size] = '\n';
1085 		entry->buf[size + 1] = '\0';
1086 	} else
1087 		entry->buf[size] = '\0';
1088 
1089 	__buffer_unlock_commit(buffer, event);
1090 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1091  out:
1092 	ring_buffer_nest_end(buffer);
1093 	return size;
1094 }
1095 EXPORT_SYMBOL_GPL(__trace_array_puts);
1096 
1097 /**
1098  * __trace_puts - write a constant string into the trace buffer.
1099  * @ip:	   The address of the caller
1100  * @str:   The constant string to write
1101  * @size:  The size of the string.
1102  */
1103 int __trace_puts(unsigned long ip, const char *str, int size)
1104 {
1105 	return __trace_array_puts(&global_trace, ip, str, size);
1106 }
1107 EXPORT_SYMBOL_GPL(__trace_puts);
1108 
1109 /**
1110  * __trace_bputs - write the pointer to a constant string into trace buffer
1111  * @ip:	   The address of the caller
1112  * @str:   The constant string to write to the buffer to
1113  */
1114 int __trace_bputs(unsigned long ip, const char *str)
1115 {
1116 	struct ring_buffer_event *event;
1117 	struct trace_buffer *buffer;
1118 	struct bputs_entry *entry;
1119 	unsigned int trace_ctx;
1120 	int size = sizeof(struct bputs_entry);
1121 	int ret = 0;
1122 
1123 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
1124 		return 0;
1125 
1126 	if (unlikely(tracing_selftest_running || tracing_disabled))
1127 		return 0;
1128 
1129 	trace_ctx = tracing_gen_ctx();
1130 	buffer = global_trace.array_buffer.buffer;
1131 
1132 	ring_buffer_nest_start(buffer);
1133 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1134 					    trace_ctx);
1135 	if (!event)
1136 		goto out;
1137 
1138 	entry = ring_buffer_event_data(event);
1139 	entry->ip			= ip;
1140 	entry->str			= str;
1141 
1142 	__buffer_unlock_commit(buffer, event);
1143 	ftrace_trace_stack(&global_trace, buffer, trace_ctx, 4, NULL);
1144 
1145 	ret = 1;
1146  out:
1147 	ring_buffer_nest_end(buffer);
1148 	return ret;
1149 }
1150 EXPORT_SYMBOL_GPL(__trace_bputs);
1151 
1152 #ifdef CONFIG_TRACER_SNAPSHOT
1153 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1154 					   void *cond_data)
1155 {
1156 	struct tracer *tracer = tr->current_trace;
1157 	unsigned long flags;
1158 
1159 	if (in_nmi()) {
1160 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1161 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1162 		return;
1163 	}
1164 
1165 	if (!tr->allocated_snapshot) {
1166 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1167 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1168 		tracer_tracing_off(tr);
1169 		return;
1170 	}
1171 
1172 	/* Note, snapshot can not be used when the tracer uses it */
1173 	if (tracer->use_max_tr) {
1174 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1175 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1176 		return;
1177 	}
1178 
1179 	local_irq_save(flags);
1180 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1181 	local_irq_restore(flags);
1182 }
1183 
1184 void tracing_snapshot_instance(struct trace_array *tr)
1185 {
1186 	tracing_snapshot_instance_cond(tr, NULL);
1187 }
1188 
1189 /**
1190  * tracing_snapshot - take a snapshot of the current buffer.
1191  *
1192  * This causes a swap between the snapshot buffer and the current live
1193  * tracing buffer. You can use this to take snapshots of the live
1194  * trace when some condition is triggered, but continue to trace.
1195  *
1196  * Note, make sure to allocate the snapshot with either
1197  * a tracing_snapshot_alloc(), or by doing it manually
1198  * with: echo 1 > /sys/kernel/tracing/snapshot
1199  *
1200  * If the snapshot buffer is not allocated, it will stop tracing.
1201  * Basically making a permanent snapshot.
1202  */
1203 void tracing_snapshot(void)
1204 {
1205 	struct trace_array *tr = &global_trace;
1206 
1207 	tracing_snapshot_instance(tr);
1208 }
1209 EXPORT_SYMBOL_GPL(tracing_snapshot);
1210 
1211 /**
1212  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1213  * @tr:		The tracing instance to snapshot
1214  * @cond_data:	The data to be tested conditionally, and possibly saved
1215  *
1216  * This is the same as tracing_snapshot() except that the snapshot is
1217  * conditional - the snapshot will only happen if the
1218  * cond_snapshot.update() implementation receiving the cond_data
1219  * returns true, which means that the trace array's cond_snapshot
1220  * update() operation used the cond_data to determine whether the
1221  * snapshot should be taken, and if it was, presumably saved it along
1222  * with the snapshot.
1223  */
1224 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1225 {
1226 	tracing_snapshot_instance_cond(tr, cond_data);
1227 }
1228 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1229 
1230 /**
1231  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1232  * @tr:		The tracing instance
1233  *
1234  * When the user enables a conditional snapshot using
1235  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1236  * with the snapshot.  This accessor is used to retrieve it.
1237  *
1238  * Should not be called from cond_snapshot.update(), since it takes
1239  * the tr->max_lock lock, which the code calling
1240  * cond_snapshot.update() has already done.
1241  *
1242  * Returns the cond_data associated with the trace array's snapshot.
1243  */
1244 void *tracing_cond_snapshot_data(struct trace_array *tr)
1245 {
1246 	void *cond_data = NULL;
1247 
1248 	local_irq_disable();
1249 	arch_spin_lock(&tr->max_lock);
1250 
1251 	if (tr->cond_snapshot)
1252 		cond_data = tr->cond_snapshot->cond_data;
1253 
1254 	arch_spin_unlock(&tr->max_lock);
1255 	local_irq_enable();
1256 
1257 	return cond_data;
1258 }
1259 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1260 
1261 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1262 					struct array_buffer *size_buf, int cpu_id);
1263 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1264 
1265 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1266 {
1267 	int order;
1268 	int ret;
1269 
1270 	if (!tr->allocated_snapshot) {
1271 
1272 		/* Make the snapshot buffer have the same order as main buffer */
1273 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1274 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1275 		if (ret < 0)
1276 			return ret;
1277 
1278 		/* allocate spare buffer */
1279 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1280 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1281 		if (ret < 0)
1282 			return ret;
1283 
1284 		tr->allocated_snapshot = true;
1285 	}
1286 
1287 	return 0;
1288 }
1289 
1290 static void free_snapshot(struct trace_array *tr)
1291 {
1292 	/*
1293 	 * We don't free the ring buffer. instead, resize it because
1294 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1295 	 * we want preserve it.
1296 	 */
1297 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1298 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1299 	set_buffer_entries(&tr->max_buffer, 1);
1300 	tracing_reset_online_cpus(&tr->max_buffer);
1301 	tr->allocated_snapshot = false;
1302 }
1303 
1304 /**
1305  * tracing_alloc_snapshot - allocate snapshot buffer.
1306  *
1307  * This only allocates the snapshot buffer if it isn't already
1308  * allocated - it doesn't also take a snapshot.
1309  *
1310  * This is meant to be used in cases where the snapshot buffer needs
1311  * to be set up for events that can't sleep but need to be able to
1312  * trigger a snapshot.
1313  */
1314 int tracing_alloc_snapshot(void)
1315 {
1316 	struct trace_array *tr = &global_trace;
1317 	int ret;
1318 
1319 	ret = tracing_alloc_snapshot_instance(tr);
1320 	WARN_ON(ret < 0);
1321 
1322 	return ret;
1323 }
1324 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1325 
1326 /**
1327  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1328  *
1329  * This is similar to tracing_snapshot(), but it will allocate the
1330  * snapshot buffer if it isn't already allocated. Use this only
1331  * where it is safe to sleep, as the allocation may sleep.
1332  *
1333  * This causes a swap between the snapshot buffer and the current live
1334  * tracing buffer. You can use this to take snapshots of the live
1335  * trace when some condition is triggered, but continue to trace.
1336  */
1337 void tracing_snapshot_alloc(void)
1338 {
1339 	int ret;
1340 
1341 	ret = tracing_alloc_snapshot();
1342 	if (ret < 0)
1343 		return;
1344 
1345 	tracing_snapshot();
1346 }
1347 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1348 
1349 /**
1350  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1351  * @tr:		The tracing instance
1352  * @cond_data:	User data to associate with the snapshot
1353  * @update:	Implementation of the cond_snapshot update function
1354  *
1355  * Check whether the conditional snapshot for the given instance has
1356  * already been enabled, or if the current tracer is already using a
1357  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1358  * save the cond_data and update function inside.
1359  *
1360  * Returns 0 if successful, error otherwise.
1361  */
1362 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1363 				 cond_update_fn_t update)
1364 {
1365 	struct cond_snapshot *cond_snapshot;
1366 	int ret = 0;
1367 
1368 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1369 	if (!cond_snapshot)
1370 		return -ENOMEM;
1371 
1372 	cond_snapshot->cond_data = cond_data;
1373 	cond_snapshot->update = update;
1374 
1375 	mutex_lock(&trace_types_lock);
1376 
1377 	ret = tracing_alloc_snapshot_instance(tr);
1378 	if (ret)
1379 		goto fail_unlock;
1380 
1381 	if (tr->current_trace->use_max_tr) {
1382 		ret = -EBUSY;
1383 		goto fail_unlock;
1384 	}
1385 
1386 	/*
1387 	 * The cond_snapshot can only change to NULL without the
1388 	 * trace_types_lock. We don't care if we race with it going
1389 	 * to NULL, but we want to make sure that it's not set to
1390 	 * something other than NULL when we get here, which we can
1391 	 * do safely with only holding the trace_types_lock and not
1392 	 * having to take the max_lock.
1393 	 */
1394 	if (tr->cond_snapshot) {
1395 		ret = -EBUSY;
1396 		goto fail_unlock;
1397 	}
1398 
1399 	local_irq_disable();
1400 	arch_spin_lock(&tr->max_lock);
1401 	tr->cond_snapshot = cond_snapshot;
1402 	arch_spin_unlock(&tr->max_lock);
1403 	local_irq_enable();
1404 
1405 	mutex_unlock(&trace_types_lock);
1406 
1407 	return ret;
1408 
1409  fail_unlock:
1410 	mutex_unlock(&trace_types_lock);
1411 	kfree(cond_snapshot);
1412 	return ret;
1413 }
1414 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1415 
1416 /**
1417  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1418  * @tr:		The tracing instance
1419  *
1420  * Check whether the conditional snapshot for the given instance is
1421  * enabled; if so, free the cond_snapshot associated with it,
1422  * otherwise return -EINVAL.
1423  *
1424  * Returns 0 if successful, error otherwise.
1425  */
1426 int tracing_snapshot_cond_disable(struct trace_array *tr)
1427 {
1428 	int ret = 0;
1429 
1430 	local_irq_disable();
1431 	arch_spin_lock(&tr->max_lock);
1432 
1433 	if (!tr->cond_snapshot)
1434 		ret = -EINVAL;
1435 	else {
1436 		kfree(tr->cond_snapshot);
1437 		tr->cond_snapshot = NULL;
1438 	}
1439 
1440 	arch_spin_unlock(&tr->max_lock);
1441 	local_irq_enable();
1442 
1443 	return ret;
1444 }
1445 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1446 #else
1447 void tracing_snapshot(void)
1448 {
1449 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1450 }
1451 EXPORT_SYMBOL_GPL(tracing_snapshot);
1452 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1453 {
1454 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1455 }
1456 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1457 int tracing_alloc_snapshot(void)
1458 {
1459 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1460 	return -ENODEV;
1461 }
1462 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1463 void tracing_snapshot_alloc(void)
1464 {
1465 	/* Give warning */
1466 	tracing_snapshot();
1467 }
1468 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1469 void *tracing_cond_snapshot_data(struct trace_array *tr)
1470 {
1471 	return NULL;
1472 }
1473 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1474 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1475 {
1476 	return -ENODEV;
1477 }
1478 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1479 int tracing_snapshot_cond_disable(struct trace_array *tr)
1480 {
1481 	return false;
1482 }
1483 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1484 #define free_snapshot(tr)	do { } while (0)
1485 #endif /* CONFIG_TRACER_SNAPSHOT */
1486 
1487 void tracer_tracing_off(struct trace_array *tr)
1488 {
1489 	if (tr->array_buffer.buffer)
1490 		ring_buffer_record_off(tr->array_buffer.buffer);
1491 	/*
1492 	 * This flag is looked at when buffers haven't been allocated
1493 	 * yet, or by some tracers (like irqsoff), that just want to
1494 	 * know if the ring buffer has been disabled, but it can handle
1495 	 * races of where it gets disabled but we still do a record.
1496 	 * As the check is in the fast path of the tracers, it is more
1497 	 * important to be fast than accurate.
1498 	 */
1499 	tr->buffer_disabled = 1;
1500 	/* Make the flag seen by readers */
1501 	smp_wmb();
1502 }
1503 
1504 /**
1505  * tracing_off - turn off tracing buffers
1506  *
1507  * This function stops the tracing buffers from recording data.
1508  * It does not disable any overhead the tracers themselves may
1509  * be causing. This function simply causes all recording to
1510  * the ring buffers to fail.
1511  */
1512 void tracing_off(void)
1513 {
1514 	tracer_tracing_off(&global_trace);
1515 }
1516 EXPORT_SYMBOL_GPL(tracing_off);
1517 
1518 void disable_trace_on_warning(void)
1519 {
1520 	if (__disable_trace_on_warning) {
1521 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1522 			"Disabling tracing due to warning\n");
1523 		tracing_off();
1524 	}
1525 }
1526 
1527 /**
1528  * tracer_tracing_is_on - show real state of ring buffer enabled
1529  * @tr : the trace array to know if ring buffer is enabled
1530  *
1531  * Shows real state of the ring buffer if it is enabled or not.
1532  */
1533 bool tracer_tracing_is_on(struct trace_array *tr)
1534 {
1535 	if (tr->array_buffer.buffer)
1536 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1537 	return !tr->buffer_disabled;
1538 }
1539 
1540 /**
1541  * tracing_is_on - show state of ring buffers enabled
1542  */
1543 int tracing_is_on(void)
1544 {
1545 	return tracer_tracing_is_on(&global_trace);
1546 }
1547 EXPORT_SYMBOL_GPL(tracing_is_on);
1548 
1549 static int __init set_buf_size(char *str)
1550 {
1551 	unsigned long buf_size;
1552 
1553 	if (!str)
1554 		return 0;
1555 	buf_size = memparse(str, &str);
1556 	/*
1557 	 * nr_entries can not be zero and the startup
1558 	 * tests require some buffer space. Therefore
1559 	 * ensure we have at least 4096 bytes of buffer.
1560 	 */
1561 	trace_buf_size = max(4096UL, buf_size);
1562 	return 1;
1563 }
1564 __setup("trace_buf_size=", set_buf_size);
1565 
1566 static int __init set_tracing_thresh(char *str)
1567 {
1568 	unsigned long threshold;
1569 	int ret;
1570 
1571 	if (!str)
1572 		return 0;
1573 	ret = kstrtoul(str, 0, &threshold);
1574 	if (ret < 0)
1575 		return 0;
1576 	tracing_thresh = threshold * 1000;
1577 	return 1;
1578 }
1579 __setup("tracing_thresh=", set_tracing_thresh);
1580 
1581 unsigned long nsecs_to_usecs(unsigned long nsecs)
1582 {
1583 	return nsecs / 1000;
1584 }
1585 
1586 /*
1587  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1588  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1589  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1590  * of strings in the order that the evals (enum) were defined.
1591  */
1592 #undef C
1593 #define C(a, b) b
1594 
1595 /* These must match the bit positions in trace_iterator_flags */
1596 static const char *trace_options[] = {
1597 	TRACE_FLAGS
1598 	NULL
1599 };
1600 
1601 static struct {
1602 	u64 (*func)(void);
1603 	const char *name;
1604 	int in_ns;		/* is this clock in nanoseconds? */
1605 } trace_clocks[] = {
1606 	{ trace_clock_local,		"local",	1 },
1607 	{ trace_clock_global,		"global",	1 },
1608 	{ trace_clock_counter,		"counter",	0 },
1609 	{ trace_clock_jiffies,		"uptime",	0 },
1610 	{ trace_clock,			"perf",		1 },
1611 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1612 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1613 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1614 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1615 	ARCH_TRACE_CLOCKS
1616 };
1617 
1618 bool trace_clock_in_ns(struct trace_array *tr)
1619 {
1620 	if (trace_clocks[tr->clock_id].in_ns)
1621 		return true;
1622 
1623 	return false;
1624 }
1625 
1626 /*
1627  * trace_parser_get_init - gets the buffer for trace parser
1628  */
1629 int trace_parser_get_init(struct trace_parser *parser, int size)
1630 {
1631 	memset(parser, 0, sizeof(*parser));
1632 
1633 	parser->buffer = kmalloc(size, GFP_KERNEL);
1634 	if (!parser->buffer)
1635 		return 1;
1636 
1637 	parser->size = size;
1638 	return 0;
1639 }
1640 
1641 /*
1642  * trace_parser_put - frees the buffer for trace parser
1643  */
1644 void trace_parser_put(struct trace_parser *parser)
1645 {
1646 	kfree(parser->buffer);
1647 	parser->buffer = NULL;
1648 }
1649 
1650 /*
1651  * trace_get_user - reads the user input string separated by  space
1652  * (matched by isspace(ch))
1653  *
1654  * For each string found the 'struct trace_parser' is updated,
1655  * and the function returns.
1656  *
1657  * Returns number of bytes read.
1658  *
1659  * See kernel/trace/trace.h for 'struct trace_parser' details.
1660  */
1661 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1662 	size_t cnt, loff_t *ppos)
1663 {
1664 	char ch;
1665 	size_t read = 0;
1666 	ssize_t ret;
1667 
1668 	if (!*ppos)
1669 		trace_parser_clear(parser);
1670 
1671 	ret = get_user(ch, ubuf++);
1672 	if (ret)
1673 		goto out;
1674 
1675 	read++;
1676 	cnt--;
1677 
1678 	/*
1679 	 * The parser is not finished with the last write,
1680 	 * continue reading the user input without skipping spaces.
1681 	 */
1682 	if (!parser->cont) {
1683 		/* skip white space */
1684 		while (cnt && isspace(ch)) {
1685 			ret = get_user(ch, ubuf++);
1686 			if (ret)
1687 				goto out;
1688 			read++;
1689 			cnt--;
1690 		}
1691 
1692 		parser->idx = 0;
1693 
1694 		/* only spaces were written */
1695 		if (isspace(ch) || !ch) {
1696 			*ppos += read;
1697 			ret = read;
1698 			goto out;
1699 		}
1700 	}
1701 
1702 	/* read the non-space input */
1703 	while (cnt && !isspace(ch) && ch) {
1704 		if (parser->idx < parser->size - 1)
1705 			parser->buffer[parser->idx++] = ch;
1706 		else {
1707 			ret = -EINVAL;
1708 			goto out;
1709 		}
1710 		ret = get_user(ch, ubuf++);
1711 		if (ret)
1712 			goto out;
1713 		read++;
1714 		cnt--;
1715 	}
1716 
1717 	/* We either got finished input or we have to wait for another call. */
1718 	if (isspace(ch) || !ch) {
1719 		parser->buffer[parser->idx] = 0;
1720 		parser->cont = false;
1721 	} else if (parser->idx < parser->size - 1) {
1722 		parser->cont = true;
1723 		parser->buffer[parser->idx++] = ch;
1724 		/* Make sure the parsed string always terminates with '\0'. */
1725 		parser->buffer[parser->idx] = 0;
1726 	} else {
1727 		ret = -EINVAL;
1728 		goto out;
1729 	}
1730 
1731 	*ppos += read;
1732 	ret = read;
1733 
1734 out:
1735 	return ret;
1736 }
1737 
1738 /* TODO add a seq_buf_to_buffer() */
1739 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1740 {
1741 	int len;
1742 
1743 	if (trace_seq_used(s) <= s->readpos)
1744 		return -EBUSY;
1745 
1746 	len = trace_seq_used(s) - s->readpos;
1747 	if (cnt > len)
1748 		cnt = len;
1749 	memcpy(buf, s->buffer + s->readpos, cnt);
1750 
1751 	s->readpos += cnt;
1752 	return cnt;
1753 }
1754 
1755 unsigned long __read_mostly	tracing_thresh;
1756 
1757 #ifdef CONFIG_TRACER_MAX_TRACE
1758 static const struct file_operations tracing_max_lat_fops;
1759 
1760 #ifdef LATENCY_FS_NOTIFY
1761 
1762 static struct workqueue_struct *fsnotify_wq;
1763 
1764 static void latency_fsnotify_workfn(struct work_struct *work)
1765 {
1766 	struct trace_array *tr = container_of(work, struct trace_array,
1767 					      fsnotify_work);
1768 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1769 }
1770 
1771 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1772 {
1773 	struct trace_array *tr = container_of(iwork, struct trace_array,
1774 					      fsnotify_irqwork);
1775 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1776 }
1777 
1778 static void trace_create_maxlat_file(struct trace_array *tr,
1779 				     struct dentry *d_tracer)
1780 {
1781 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1782 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1783 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1784 					      TRACE_MODE_WRITE,
1785 					      d_tracer, tr,
1786 					      &tracing_max_lat_fops);
1787 }
1788 
1789 __init static int latency_fsnotify_init(void)
1790 {
1791 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1792 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1793 	if (!fsnotify_wq) {
1794 		pr_err("Unable to allocate tr_max_lat_wq\n");
1795 		return -ENOMEM;
1796 	}
1797 	return 0;
1798 }
1799 
1800 late_initcall_sync(latency_fsnotify_init);
1801 
1802 void latency_fsnotify(struct trace_array *tr)
1803 {
1804 	if (!fsnotify_wq)
1805 		return;
1806 	/*
1807 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1808 	 * possible that we are called from __schedule() or do_idle(), which
1809 	 * could cause a deadlock.
1810 	 */
1811 	irq_work_queue(&tr->fsnotify_irqwork);
1812 }
1813 
1814 #else /* !LATENCY_FS_NOTIFY */
1815 
1816 #define trace_create_maxlat_file(tr, d_tracer)				\
1817 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1818 			  d_tracer, tr, &tracing_max_lat_fops)
1819 
1820 #endif
1821 
1822 /*
1823  * Copy the new maximum trace into the separate maximum-trace
1824  * structure. (this way the maximum trace is permanently saved,
1825  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1826  */
1827 static void
1828 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1829 {
1830 	struct array_buffer *trace_buf = &tr->array_buffer;
1831 	struct array_buffer *max_buf = &tr->max_buffer;
1832 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1833 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1834 
1835 	max_buf->cpu = cpu;
1836 	max_buf->time_start = data->preempt_timestamp;
1837 
1838 	max_data->saved_latency = tr->max_latency;
1839 	max_data->critical_start = data->critical_start;
1840 	max_data->critical_end = data->critical_end;
1841 
1842 	strncpy(max_data->comm, tsk->comm, TASK_COMM_LEN);
1843 	max_data->pid = tsk->pid;
1844 	/*
1845 	 * If tsk == current, then use current_uid(), as that does not use
1846 	 * RCU. The irq tracer can be called out of RCU scope.
1847 	 */
1848 	if (tsk == current)
1849 		max_data->uid = current_uid();
1850 	else
1851 		max_data->uid = task_uid(tsk);
1852 
1853 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1854 	max_data->policy = tsk->policy;
1855 	max_data->rt_priority = tsk->rt_priority;
1856 
1857 	/* record this tasks comm */
1858 	tracing_record_cmdline(tsk);
1859 	latency_fsnotify(tr);
1860 }
1861 
1862 /**
1863  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1864  * @tr: tracer
1865  * @tsk: the task with the latency
1866  * @cpu: The cpu that initiated the trace.
1867  * @cond_data: User data associated with a conditional snapshot
1868  *
1869  * Flip the buffers between the @tr and the max_tr and record information
1870  * about which task was the cause of this latency.
1871  */
1872 void
1873 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1874 	      void *cond_data)
1875 {
1876 	if (tr->stop_count)
1877 		return;
1878 
1879 	WARN_ON_ONCE(!irqs_disabled());
1880 
1881 	if (!tr->allocated_snapshot) {
1882 		/* Only the nop tracer should hit this when disabling */
1883 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1884 		return;
1885 	}
1886 
1887 	arch_spin_lock(&tr->max_lock);
1888 
1889 	/* Inherit the recordable setting from array_buffer */
1890 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1891 		ring_buffer_record_on(tr->max_buffer.buffer);
1892 	else
1893 		ring_buffer_record_off(tr->max_buffer.buffer);
1894 
1895 #ifdef CONFIG_TRACER_SNAPSHOT
1896 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1897 		arch_spin_unlock(&tr->max_lock);
1898 		return;
1899 	}
1900 #endif
1901 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1902 
1903 	__update_max_tr(tr, tsk, cpu);
1904 
1905 	arch_spin_unlock(&tr->max_lock);
1906 
1907 	/* Any waiters on the old snapshot buffer need to wake up */
1908 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1909 }
1910 
1911 /**
1912  * update_max_tr_single - only copy one trace over, and reset the rest
1913  * @tr: tracer
1914  * @tsk: task with the latency
1915  * @cpu: the cpu of the buffer to copy.
1916  *
1917  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
1918  */
1919 void
1920 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
1921 {
1922 	int ret;
1923 
1924 	if (tr->stop_count)
1925 		return;
1926 
1927 	WARN_ON_ONCE(!irqs_disabled());
1928 	if (!tr->allocated_snapshot) {
1929 		/* Only the nop tracer should hit this when disabling */
1930 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1931 		return;
1932 	}
1933 
1934 	arch_spin_lock(&tr->max_lock);
1935 
1936 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
1937 
1938 	if (ret == -EBUSY) {
1939 		/*
1940 		 * We failed to swap the buffer due to a commit taking
1941 		 * place on this CPU. We fail to record, but we reset
1942 		 * the max trace buffer (no one writes directly to it)
1943 		 * and flag that it failed.
1944 		 * Another reason is resize is in progress.
1945 		 */
1946 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
1947 			"Failed to swap buffers due to commit or resize in progress\n");
1948 	}
1949 
1950 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
1951 
1952 	__update_max_tr(tr, tsk, cpu);
1953 	arch_spin_unlock(&tr->max_lock);
1954 }
1955 
1956 #endif /* CONFIG_TRACER_MAX_TRACE */
1957 
1958 struct pipe_wait {
1959 	struct trace_iterator		*iter;
1960 	int				wait_index;
1961 };
1962 
1963 static bool wait_pipe_cond(void *data)
1964 {
1965 	struct pipe_wait *pwait = data;
1966 	struct trace_iterator *iter = pwait->iter;
1967 
1968 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
1969 		return true;
1970 
1971 	return iter->closed;
1972 }
1973 
1974 static int wait_on_pipe(struct trace_iterator *iter, int full)
1975 {
1976 	struct pipe_wait pwait;
1977 	int ret;
1978 
1979 	/* Iterators are static, they should be filled or empty */
1980 	if (trace_buffer_iter(iter, iter->cpu_file))
1981 		return 0;
1982 
1983 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
1984 	pwait.iter = iter;
1985 
1986 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
1987 			       wait_pipe_cond, &pwait);
1988 
1989 #ifdef CONFIG_TRACER_MAX_TRACE
1990 	/*
1991 	 * Make sure this is still the snapshot buffer, as if a snapshot were
1992 	 * to happen, this would now be the main buffer.
1993 	 */
1994 	if (iter->snapshot)
1995 		iter->array_buffer = &iter->tr->max_buffer;
1996 #endif
1997 	return ret;
1998 }
1999 
2000 #ifdef CONFIG_FTRACE_STARTUP_TEST
2001 static bool selftests_can_run;
2002 
2003 struct trace_selftests {
2004 	struct list_head		list;
2005 	struct tracer			*type;
2006 };
2007 
2008 static LIST_HEAD(postponed_selftests);
2009 
2010 static int save_selftest(struct tracer *type)
2011 {
2012 	struct trace_selftests *selftest;
2013 
2014 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2015 	if (!selftest)
2016 		return -ENOMEM;
2017 
2018 	selftest->type = type;
2019 	list_add(&selftest->list, &postponed_selftests);
2020 	return 0;
2021 }
2022 
2023 static int run_tracer_selftest(struct tracer *type)
2024 {
2025 	struct trace_array *tr = &global_trace;
2026 	struct tracer *saved_tracer = tr->current_trace;
2027 	int ret;
2028 
2029 	if (!type->selftest || tracing_selftest_disabled)
2030 		return 0;
2031 
2032 	/*
2033 	 * If a tracer registers early in boot up (before scheduling is
2034 	 * initialized and such), then do not run its selftests yet.
2035 	 * Instead, run it a little later in the boot process.
2036 	 */
2037 	if (!selftests_can_run)
2038 		return save_selftest(type);
2039 
2040 	if (!tracing_is_on()) {
2041 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2042 			type->name);
2043 		return 0;
2044 	}
2045 
2046 	/*
2047 	 * Run a selftest on this tracer.
2048 	 * Here we reset the trace buffer, and set the current
2049 	 * tracer to be this tracer. The tracer can then run some
2050 	 * internal tracing to verify that everything is in order.
2051 	 * If we fail, we do not register this tracer.
2052 	 */
2053 	tracing_reset_online_cpus(&tr->array_buffer);
2054 
2055 	tr->current_trace = type;
2056 
2057 #ifdef CONFIG_TRACER_MAX_TRACE
2058 	if (type->use_max_tr) {
2059 		/* If we expanded the buffers, make sure the max is expanded too */
2060 		if (tr->ring_buffer_expanded)
2061 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2062 					   RING_BUFFER_ALL_CPUS);
2063 		tr->allocated_snapshot = true;
2064 	}
2065 #endif
2066 
2067 	/* the test is responsible for initializing and enabling */
2068 	pr_info("Testing tracer %s: ", type->name);
2069 	ret = type->selftest(type, tr);
2070 	/* the test is responsible for resetting too */
2071 	tr->current_trace = saved_tracer;
2072 	if (ret) {
2073 		printk(KERN_CONT "FAILED!\n");
2074 		/* Add the warning after printing 'FAILED' */
2075 		WARN_ON(1);
2076 		return -1;
2077 	}
2078 	/* Only reset on passing, to avoid touching corrupted buffers */
2079 	tracing_reset_online_cpus(&tr->array_buffer);
2080 
2081 #ifdef CONFIG_TRACER_MAX_TRACE
2082 	if (type->use_max_tr) {
2083 		tr->allocated_snapshot = false;
2084 
2085 		/* Shrink the max buffer again */
2086 		if (tr->ring_buffer_expanded)
2087 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2088 					   RING_BUFFER_ALL_CPUS);
2089 	}
2090 #endif
2091 
2092 	printk(KERN_CONT "PASSED\n");
2093 	return 0;
2094 }
2095 
2096 static int do_run_tracer_selftest(struct tracer *type)
2097 {
2098 	int ret;
2099 
2100 	/*
2101 	 * Tests can take a long time, especially if they are run one after the
2102 	 * other, as does happen during bootup when all the tracers are
2103 	 * registered. This could cause the soft lockup watchdog to trigger.
2104 	 */
2105 	cond_resched();
2106 
2107 	tracing_selftest_running = true;
2108 	ret = run_tracer_selftest(type);
2109 	tracing_selftest_running = false;
2110 
2111 	return ret;
2112 }
2113 
2114 static __init int init_trace_selftests(void)
2115 {
2116 	struct trace_selftests *p, *n;
2117 	struct tracer *t, **last;
2118 	int ret;
2119 
2120 	selftests_can_run = true;
2121 
2122 	mutex_lock(&trace_types_lock);
2123 
2124 	if (list_empty(&postponed_selftests))
2125 		goto out;
2126 
2127 	pr_info("Running postponed tracer tests:\n");
2128 
2129 	tracing_selftest_running = true;
2130 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2131 		/* This loop can take minutes when sanitizers are enabled, so
2132 		 * lets make sure we allow RCU processing.
2133 		 */
2134 		cond_resched();
2135 		ret = run_tracer_selftest(p->type);
2136 		/* If the test fails, then warn and remove from available_tracers */
2137 		if (ret < 0) {
2138 			WARN(1, "tracer: %s failed selftest, disabling\n",
2139 			     p->type->name);
2140 			last = &trace_types;
2141 			for (t = trace_types; t; t = t->next) {
2142 				if (t == p->type) {
2143 					*last = t->next;
2144 					break;
2145 				}
2146 				last = &t->next;
2147 			}
2148 		}
2149 		list_del(&p->list);
2150 		kfree(p);
2151 	}
2152 	tracing_selftest_running = false;
2153 
2154  out:
2155 	mutex_unlock(&trace_types_lock);
2156 
2157 	return 0;
2158 }
2159 core_initcall(init_trace_selftests);
2160 #else
2161 static inline int run_tracer_selftest(struct tracer *type)
2162 {
2163 	return 0;
2164 }
2165 static inline int do_run_tracer_selftest(struct tracer *type)
2166 {
2167 	return 0;
2168 }
2169 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2170 
2171 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2172 
2173 static void __init apply_trace_boot_options(void);
2174 
2175 /**
2176  * register_tracer - register a tracer with the ftrace system.
2177  * @type: the plugin for the tracer
2178  *
2179  * Register a new plugin tracer.
2180  */
2181 int __init register_tracer(struct tracer *type)
2182 {
2183 	struct tracer *t;
2184 	int ret = 0;
2185 
2186 	if (!type->name) {
2187 		pr_info("Tracer must have a name\n");
2188 		return -1;
2189 	}
2190 
2191 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2192 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2193 		return -1;
2194 	}
2195 
2196 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2197 		pr_warn("Can not register tracer %s due to lockdown\n",
2198 			   type->name);
2199 		return -EPERM;
2200 	}
2201 
2202 	mutex_lock(&trace_types_lock);
2203 
2204 	for (t = trace_types; t; t = t->next) {
2205 		if (strcmp(type->name, t->name) == 0) {
2206 			/* already found */
2207 			pr_info("Tracer %s already registered\n",
2208 				type->name);
2209 			ret = -1;
2210 			goto out;
2211 		}
2212 	}
2213 
2214 	if (!type->set_flag)
2215 		type->set_flag = &dummy_set_flag;
2216 	if (!type->flags) {
2217 		/*allocate a dummy tracer_flags*/
2218 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2219 		if (!type->flags) {
2220 			ret = -ENOMEM;
2221 			goto out;
2222 		}
2223 		type->flags->val = 0;
2224 		type->flags->opts = dummy_tracer_opt;
2225 	} else
2226 		if (!type->flags->opts)
2227 			type->flags->opts = dummy_tracer_opt;
2228 
2229 	/* store the tracer for __set_tracer_option */
2230 	type->flags->trace = type;
2231 
2232 	ret = do_run_tracer_selftest(type);
2233 	if (ret < 0)
2234 		goto out;
2235 
2236 	type->next = trace_types;
2237 	trace_types = type;
2238 	add_tracer_options(&global_trace, type);
2239 
2240  out:
2241 	mutex_unlock(&trace_types_lock);
2242 
2243 	if (ret || !default_bootup_tracer)
2244 		goto out_unlock;
2245 
2246 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2247 		goto out_unlock;
2248 
2249 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2250 	/* Do we want this tracer to start on bootup? */
2251 	tracing_set_tracer(&global_trace, type->name);
2252 	default_bootup_tracer = NULL;
2253 
2254 	apply_trace_boot_options();
2255 
2256 	/* disable other selftests, since this will break it. */
2257 	disable_tracing_selftest("running a tracer");
2258 
2259  out_unlock:
2260 	return ret;
2261 }
2262 
2263 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2264 {
2265 	struct trace_buffer *buffer = buf->buffer;
2266 
2267 	if (!buffer)
2268 		return;
2269 
2270 	ring_buffer_record_disable(buffer);
2271 
2272 	/* Make sure all commits have finished */
2273 	synchronize_rcu();
2274 	ring_buffer_reset_cpu(buffer, cpu);
2275 
2276 	ring_buffer_record_enable(buffer);
2277 }
2278 
2279 void tracing_reset_online_cpus(struct array_buffer *buf)
2280 {
2281 	struct trace_buffer *buffer = buf->buffer;
2282 
2283 	if (!buffer)
2284 		return;
2285 
2286 	ring_buffer_record_disable(buffer);
2287 
2288 	/* Make sure all commits have finished */
2289 	synchronize_rcu();
2290 
2291 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2292 
2293 	ring_buffer_reset_online_cpus(buffer);
2294 
2295 	ring_buffer_record_enable(buffer);
2296 }
2297 
2298 /* Must have trace_types_lock held */
2299 void tracing_reset_all_online_cpus_unlocked(void)
2300 {
2301 	struct trace_array *tr;
2302 
2303 	lockdep_assert_held(&trace_types_lock);
2304 
2305 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2306 		if (!tr->clear_trace)
2307 			continue;
2308 		tr->clear_trace = false;
2309 		tracing_reset_online_cpus(&tr->array_buffer);
2310 #ifdef CONFIG_TRACER_MAX_TRACE
2311 		tracing_reset_online_cpus(&tr->max_buffer);
2312 #endif
2313 	}
2314 }
2315 
2316 void tracing_reset_all_online_cpus(void)
2317 {
2318 	mutex_lock(&trace_types_lock);
2319 	tracing_reset_all_online_cpus_unlocked();
2320 	mutex_unlock(&trace_types_lock);
2321 }
2322 
2323 /*
2324  * The tgid_map array maps from pid to tgid; i.e. the value stored at index i
2325  * is the tgid last observed corresponding to pid=i.
2326  */
2327 static int *tgid_map;
2328 
2329 /* The maximum valid index into tgid_map. */
2330 static size_t tgid_map_max;
2331 
2332 #define SAVED_CMDLINES_DEFAULT 128
2333 #define NO_CMDLINE_MAP UINT_MAX
2334 /*
2335  * Preemption must be disabled before acquiring trace_cmdline_lock.
2336  * The various trace_arrays' max_lock must be acquired in a context
2337  * where interrupt is disabled.
2338  */
2339 static arch_spinlock_t trace_cmdline_lock = __ARCH_SPIN_LOCK_UNLOCKED;
2340 struct saved_cmdlines_buffer {
2341 	unsigned map_pid_to_cmdline[PID_MAX_DEFAULT+1];
2342 	unsigned *map_cmdline_to_pid;
2343 	unsigned cmdline_num;
2344 	int cmdline_idx;
2345 	char saved_cmdlines[];
2346 };
2347 static struct saved_cmdlines_buffer *savedcmd;
2348 
2349 static inline char *get_saved_cmdlines(int idx)
2350 {
2351 	return &savedcmd->saved_cmdlines[idx * TASK_COMM_LEN];
2352 }
2353 
2354 static inline void set_cmdline(int idx, const char *cmdline)
2355 {
2356 	strncpy(get_saved_cmdlines(idx), cmdline, TASK_COMM_LEN);
2357 }
2358 
2359 static void free_saved_cmdlines_buffer(struct saved_cmdlines_buffer *s)
2360 {
2361 	int order = get_order(sizeof(*s) + s->cmdline_num * TASK_COMM_LEN);
2362 
2363 	kfree(s->map_cmdline_to_pid);
2364 	kmemleak_free(s);
2365 	free_pages((unsigned long)s, order);
2366 }
2367 
2368 static struct saved_cmdlines_buffer *allocate_cmdlines_buffer(unsigned int val)
2369 {
2370 	struct saved_cmdlines_buffer *s;
2371 	struct page *page;
2372 	int orig_size, size;
2373 	int order;
2374 
2375 	/* Figure out how much is needed to hold the given number of cmdlines */
2376 	orig_size = sizeof(*s) + val * TASK_COMM_LEN;
2377 	order = get_order(orig_size);
2378 	size = 1 << (order + PAGE_SHIFT);
2379 	page = alloc_pages(GFP_KERNEL, order);
2380 	if (!page)
2381 		return NULL;
2382 
2383 	s = page_address(page);
2384 	kmemleak_alloc(s, size, 1, GFP_KERNEL);
2385 	memset(s, 0, sizeof(*s));
2386 
2387 	/* Round up to actual allocation */
2388 	val = (size - sizeof(*s)) / TASK_COMM_LEN;
2389 	s->cmdline_num = val;
2390 
2391 	s->map_cmdline_to_pid = kmalloc_array(val,
2392 					      sizeof(*s->map_cmdline_to_pid),
2393 					      GFP_KERNEL);
2394 	if (!s->map_cmdline_to_pid) {
2395 		free_saved_cmdlines_buffer(s);
2396 		return NULL;
2397 	}
2398 
2399 	s->cmdline_idx = 0;
2400 	memset(&s->map_pid_to_cmdline, NO_CMDLINE_MAP,
2401 	       sizeof(s->map_pid_to_cmdline));
2402 	memset(s->map_cmdline_to_pid, NO_CMDLINE_MAP,
2403 	       val * sizeof(*s->map_cmdline_to_pid));
2404 
2405 	return s;
2406 }
2407 
2408 static int trace_create_savedcmd(void)
2409 {
2410 	savedcmd = allocate_cmdlines_buffer(SAVED_CMDLINES_DEFAULT);
2411 
2412 	return savedcmd ? 0 : -ENOMEM;
2413 }
2414 
2415 int is_tracing_stopped(void)
2416 {
2417 	return global_trace.stop_count;
2418 }
2419 
2420 static void tracing_start_tr(struct trace_array *tr)
2421 {
2422 	struct trace_buffer *buffer;
2423 	unsigned long flags;
2424 
2425 	if (tracing_disabled)
2426 		return;
2427 
2428 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2429 	if (--tr->stop_count) {
2430 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2431 			/* Someone screwed up their debugging */
2432 			tr->stop_count = 0;
2433 		}
2434 		goto out;
2435 	}
2436 
2437 	/* Prevent the buffers from switching */
2438 	arch_spin_lock(&tr->max_lock);
2439 
2440 	buffer = tr->array_buffer.buffer;
2441 	if (buffer)
2442 		ring_buffer_record_enable(buffer);
2443 
2444 #ifdef CONFIG_TRACER_MAX_TRACE
2445 	buffer = tr->max_buffer.buffer;
2446 	if (buffer)
2447 		ring_buffer_record_enable(buffer);
2448 #endif
2449 
2450 	arch_spin_unlock(&tr->max_lock);
2451 
2452  out:
2453 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2454 }
2455 
2456 /**
2457  * tracing_start - quick start of the tracer
2458  *
2459  * If tracing is enabled but was stopped by tracing_stop,
2460  * this will start the tracer back up.
2461  */
2462 void tracing_start(void)
2463 
2464 {
2465 	return tracing_start_tr(&global_trace);
2466 }
2467 
2468 static void tracing_stop_tr(struct trace_array *tr)
2469 {
2470 	struct trace_buffer *buffer;
2471 	unsigned long flags;
2472 
2473 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2474 	if (tr->stop_count++)
2475 		goto out;
2476 
2477 	/* Prevent the buffers from switching */
2478 	arch_spin_lock(&tr->max_lock);
2479 
2480 	buffer = tr->array_buffer.buffer;
2481 	if (buffer)
2482 		ring_buffer_record_disable(buffer);
2483 
2484 #ifdef CONFIG_TRACER_MAX_TRACE
2485 	buffer = tr->max_buffer.buffer;
2486 	if (buffer)
2487 		ring_buffer_record_disable(buffer);
2488 #endif
2489 
2490 	arch_spin_unlock(&tr->max_lock);
2491 
2492  out:
2493 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2494 }
2495 
2496 /**
2497  * tracing_stop - quick stop of the tracer
2498  *
2499  * Light weight way to stop tracing. Use in conjunction with
2500  * tracing_start.
2501  */
2502 void tracing_stop(void)
2503 {
2504 	return tracing_stop_tr(&global_trace);
2505 }
2506 
2507 static int trace_save_cmdline(struct task_struct *tsk)
2508 {
2509 	unsigned tpid, idx;
2510 
2511 	/* treat recording of idle task as a success */
2512 	if (!tsk->pid)
2513 		return 1;
2514 
2515 	tpid = tsk->pid & (PID_MAX_DEFAULT - 1);
2516 
2517 	/*
2518 	 * It's not the end of the world if we don't get
2519 	 * the lock, but we also don't want to spin
2520 	 * nor do we want to disable interrupts,
2521 	 * so if we miss here, then better luck next time.
2522 	 *
2523 	 * This is called within the scheduler and wake up, so interrupts
2524 	 * had better been disabled and run queue lock been held.
2525 	 */
2526 	lockdep_assert_preemption_disabled();
2527 	if (!arch_spin_trylock(&trace_cmdline_lock))
2528 		return 0;
2529 
2530 	idx = savedcmd->map_pid_to_cmdline[tpid];
2531 	if (idx == NO_CMDLINE_MAP) {
2532 		idx = (savedcmd->cmdline_idx + 1) % savedcmd->cmdline_num;
2533 
2534 		savedcmd->map_pid_to_cmdline[tpid] = idx;
2535 		savedcmd->cmdline_idx = idx;
2536 	}
2537 
2538 	savedcmd->map_cmdline_to_pid[idx] = tsk->pid;
2539 	set_cmdline(idx, tsk->comm);
2540 
2541 	arch_spin_unlock(&trace_cmdline_lock);
2542 
2543 	return 1;
2544 }
2545 
2546 static void __trace_find_cmdline(int pid, char comm[])
2547 {
2548 	unsigned map;
2549 	int tpid;
2550 
2551 	if (!pid) {
2552 		strcpy(comm, "<idle>");
2553 		return;
2554 	}
2555 
2556 	if (WARN_ON_ONCE(pid < 0)) {
2557 		strcpy(comm, "<XXX>");
2558 		return;
2559 	}
2560 
2561 	tpid = pid & (PID_MAX_DEFAULT - 1);
2562 	map = savedcmd->map_pid_to_cmdline[tpid];
2563 	if (map != NO_CMDLINE_MAP) {
2564 		tpid = savedcmd->map_cmdline_to_pid[map];
2565 		if (tpid == pid) {
2566 			strscpy(comm, get_saved_cmdlines(map), TASK_COMM_LEN);
2567 			return;
2568 		}
2569 	}
2570 	strcpy(comm, "<...>");
2571 }
2572 
2573 void trace_find_cmdline(int pid, char comm[])
2574 {
2575 	preempt_disable();
2576 	arch_spin_lock(&trace_cmdline_lock);
2577 
2578 	__trace_find_cmdline(pid, comm);
2579 
2580 	arch_spin_unlock(&trace_cmdline_lock);
2581 	preempt_enable();
2582 }
2583 
2584 static int *trace_find_tgid_ptr(int pid)
2585 {
2586 	/*
2587 	 * Pairs with the smp_store_release in set_tracer_flag() to ensure that
2588 	 * if we observe a non-NULL tgid_map then we also observe the correct
2589 	 * tgid_map_max.
2590 	 */
2591 	int *map = smp_load_acquire(&tgid_map);
2592 
2593 	if (unlikely(!map || pid > tgid_map_max))
2594 		return NULL;
2595 
2596 	return &map[pid];
2597 }
2598 
2599 int trace_find_tgid(int pid)
2600 {
2601 	int *ptr = trace_find_tgid_ptr(pid);
2602 
2603 	return ptr ? *ptr : 0;
2604 }
2605 
2606 static int trace_save_tgid(struct task_struct *tsk)
2607 {
2608 	int *ptr;
2609 
2610 	/* treat recording of idle task as a success */
2611 	if (!tsk->pid)
2612 		return 1;
2613 
2614 	ptr = trace_find_tgid_ptr(tsk->pid);
2615 	if (!ptr)
2616 		return 0;
2617 
2618 	*ptr = tsk->tgid;
2619 	return 1;
2620 }
2621 
2622 static bool tracing_record_taskinfo_skip(int flags)
2623 {
2624 	if (unlikely(!(flags & (TRACE_RECORD_CMDLINE | TRACE_RECORD_TGID))))
2625 		return true;
2626 	if (!__this_cpu_read(trace_taskinfo_save))
2627 		return true;
2628 	return false;
2629 }
2630 
2631 /**
2632  * tracing_record_taskinfo - record the task info of a task
2633  *
2634  * @task:  task to record
2635  * @flags: TRACE_RECORD_CMDLINE for recording comm
2636  *         TRACE_RECORD_TGID for recording tgid
2637  */
2638 void tracing_record_taskinfo(struct task_struct *task, int flags)
2639 {
2640 	bool done;
2641 
2642 	if (tracing_record_taskinfo_skip(flags))
2643 		return;
2644 
2645 	/*
2646 	 * Record as much task information as possible. If some fail, continue
2647 	 * to try to record the others.
2648 	 */
2649 	done = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(task);
2650 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(task);
2651 
2652 	/* If recording any information failed, retry again soon. */
2653 	if (!done)
2654 		return;
2655 
2656 	__this_cpu_write(trace_taskinfo_save, false);
2657 }
2658 
2659 /**
2660  * tracing_record_taskinfo_sched_switch - record task info for sched_switch
2661  *
2662  * @prev: previous task during sched_switch
2663  * @next: next task during sched_switch
2664  * @flags: TRACE_RECORD_CMDLINE for recording comm
2665  *         TRACE_RECORD_TGID for recording tgid
2666  */
2667 void tracing_record_taskinfo_sched_switch(struct task_struct *prev,
2668 					  struct task_struct *next, int flags)
2669 {
2670 	bool done;
2671 
2672 	if (tracing_record_taskinfo_skip(flags))
2673 		return;
2674 
2675 	/*
2676 	 * Record as much task information as possible. If some fail, continue
2677 	 * to try to record the others.
2678 	 */
2679 	done  = !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(prev);
2680 	done &= !(flags & TRACE_RECORD_CMDLINE) || trace_save_cmdline(next);
2681 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(prev);
2682 	done &= !(flags & TRACE_RECORD_TGID) || trace_save_tgid(next);
2683 
2684 	/* If recording any information failed, retry again soon. */
2685 	if (!done)
2686 		return;
2687 
2688 	__this_cpu_write(trace_taskinfo_save, false);
2689 }
2690 
2691 /* Helpers to record a specific task information */
2692 void tracing_record_cmdline(struct task_struct *task)
2693 {
2694 	tracing_record_taskinfo(task, TRACE_RECORD_CMDLINE);
2695 }
2696 
2697 void tracing_record_tgid(struct task_struct *task)
2698 {
2699 	tracing_record_taskinfo(task, TRACE_RECORD_TGID);
2700 }
2701 
2702 /*
2703  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2704  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2705  * simplifies those functions and keeps them in sync.
2706  */
2707 enum print_line_t trace_handle_return(struct trace_seq *s)
2708 {
2709 	return trace_seq_has_overflowed(s) ?
2710 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2711 }
2712 EXPORT_SYMBOL_GPL(trace_handle_return);
2713 
2714 static unsigned short migration_disable_value(void)
2715 {
2716 #if defined(CONFIG_SMP)
2717 	return current->migration_disabled;
2718 #else
2719 	return 0;
2720 #endif
2721 }
2722 
2723 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2724 {
2725 	unsigned int trace_flags = irqs_status;
2726 	unsigned int pc;
2727 
2728 	pc = preempt_count();
2729 
2730 	if (pc & NMI_MASK)
2731 		trace_flags |= TRACE_FLAG_NMI;
2732 	if (pc & HARDIRQ_MASK)
2733 		trace_flags |= TRACE_FLAG_HARDIRQ;
2734 	if (in_serving_softirq())
2735 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2736 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2737 		trace_flags |= TRACE_FLAG_BH_OFF;
2738 
2739 	if (tif_need_resched())
2740 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2741 	if (test_preempt_need_resched())
2742 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2743 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2744 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2745 }
2746 
2747 struct ring_buffer_event *
2748 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2749 			  int type,
2750 			  unsigned long len,
2751 			  unsigned int trace_ctx)
2752 {
2753 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2754 }
2755 
2756 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2757 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2758 static int trace_buffered_event_ref;
2759 
2760 /**
2761  * trace_buffered_event_enable - enable buffering events
2762  *
2763  * When events are being filtered, it is quicker to use a temporary
2764  * buffer to write the event data into if there's a likely chance
2765  * that it will not be committed. The discard of the ring buffer
2766  * is not as fast as committing, and is much slower than copying
2767  * a commit.
2768  *
2769  * When an event is to be filtered, allocate per cpu buffers to
2770  * write the event data into, and if the event is filtered and discarded
2771  * it is simply dropped, otherwise, the entire data is to be committed
2772  * in one shot.
2773  */
2774 void trace_buffered_event_enable(void)
2775 {
2776 	struct ring_buffer_event *event;
2777 	struct page *page;
2778 	int cpu;
2779 
2780 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2781 
2782 	if (trace_buffered_event_ref++)
2783 		return;
2784 
2785 	for_each_tracing_cpu(cpu) {
2786 		page = alloc_pages_node(cpu_to_node(cpu),
2787 					GFP_KERNEL | __GFP_NORETRY, 0);
2788 		/* This is just an optimization and can handle failures */
2789 		if (!page) {
2790 			pr_err("Failed to allocate event buffer\n");
2791 			break;
2792 		}
2793 
2794 		event = page_address(page);
2795 		memset(event, 0, sizeof(*event));
2796 
2797 		per_cpu(trace_buffered_event, cpu) = event;
2798 
2799 		preempt_disable();
2800 		if (cpu == smp_processor_id() &&
2801 		    __this_cpu_read(trace_buffered_event) !=
2802 		    per_cpu(trace_buffered_event, cpu))
2803 			WARN_ON_ONCE(1);
2804 		preempt_enable();
2805 	}
2806 }
2807 
2808 static void enable_trace_buffered_event(void *data)
2809 {
2810 	/* Probably not needed, but do it anyway */
2811 	smp_rmb();
2812 	this_cpu_dec(trace_buffered_event_cnt);
2813 }
2814 
2815 static void disable_trace_buffered_event(void *data)
2816 {
2817 	this_cpu_inc(trace_buffered_event_cnt);
2818 }
2819 
2820 /**
2821  * trace_buffered_event_disable - disable buffering events
2822  *
2823  * When a filter is removed, it is faster to not use the buffered
2824  * events, and to commit directly into the ring buffer. Free up
2825  * the temp buffers when there are no more users. This requires
2826  * special synchronization with current events.
2827  */
2828 void trace_buffered_event_disable(void)
2829 {
2830 	int cpu;
2831 
2832 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2833 
2834 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2835 		return;
2836 
2837 	if (--trace_buffered_event_ref)
2838 		return;
2839 
2840 	/* For each CPU, set the buffer as used. */
2841 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2842 			 NULL, true);
2843 
2844 	/* Wait for all current users to finish */
2845 	synchronize_rcu();
2846 
2847 	for_each_tracing_cpu(cpu) {
2848 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2849 		per_cpu(trace_buffered_event, cpu) = NULL;
2850 	}
2851 
2852 	/*
2853 	 * Wait for all CPUs that potentially started checking if they can use
2854 	 * their event buffer only after the previous synchronize_rcu() call and
2855 	 * they still read a valid pointer from trace_buffered_event. It must be
2856 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2857 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2858 	 */
2859 	synchronize_rcu();
2860 
2861 	/* For each CPU, relinquish the buffer */
2862 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2863 			 true);
2864 }
2865 
2866 static struct trace_buffer *temp_buffer;
2867 
2868 struct ring_buffer_event *
2869 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2870 			  struct trace_event_file *trace_file,
2871 			  int type, unsigned long len,
2872 			  unsigned int trace_ctx)
2873 {
2874 	struct ring_buffer_event *entry;
2875 	struct trace_array *tr = trace_file->tr;
2876 	int val;
2877 
2878 	*current_rb = tr->array_buffer.buffer;
2879 
2880 	if (!tr->no_filter_buffering_ref &&
2881 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2882 		preempt_disable_notrace();
2883 		/*
2884 		 * Filtering is on, so try to use the per cpu buffer first.
2885 		 * This buffer will simulate a ring_buffer_event,
2886 		 * where the type_len is zero and the array[0] will
2887 		 * hold the full length.
2888 		 * (see include/linux/ring-buffer.h for details on
2889 		 *  how the ring_buffer_event is structured).
2890 		 *
2891 		 * Using a temp buffer during filtering and copying it
2892 		 * on a matched filter is quicker than writing directly
2893 		 * into the ring buffer and then discarding it when
2894 		 * it doesn't match. That is because the discard
2895 		 * requires several atomic operations to get right.
2896 		 * Copying on match and doing nothing on a failed match
2897 		 * is still quicker than no copy on match, but having
2898 		 * to discard out of the ring buffer on a failed match.
2899 		 */
2900 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2901 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2902 
2903 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2904 
2905 			/*
2906 			 * Preemption is disabled, but interrupts and NMIs
2907 			 * can still come in now. If that happens after
2908 			 * the above increment, then it will have to go
2909 			 * back to the old method of allocating the event
2910 			 * on the ring buffer, and if the filter fails, it
2911 			 * will have to call ring_buffer_discard_commit()
2912 			 * to remove it.
2913 			 *
2914 			 * Need to also check the unlikely case that the
2915 			 * length is bigger than the temp buffer size.
2916 			 * If that happens, then the reserve is pretty much
2917 			 * guaranteed to fail, as the ring buffer currently
2918 			 * only allows events less than a page. But that may
2919 			 * change in the future, so let the ring buffer reserve
2920 			 * handle the failure in that case.
2921 			 */
2922 			if (val == 1 && likely(len <= max_len)) {
2923 				trace_event_setup(entry, type, trace_ctx);
2924 				entry->array[0] = len;
2925 				/* Return with preemption disabled */
2926 				return entry;
2927 			}
2928 			this_cpu_dec(trace_buffered_event_cnt);
2929 		}
2930 		/* __trace_buffer_lock_reserve() disables preemption */
2931 		preempt_enable_notrace();
2932 	}
2933 
2934 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2935 					    trace_ctx);
2936 	/*
2937 	 * If tracing is off, but we have triggers enabled
2938 	 * we still need to look at the event data. Use the temp_buffer
2939 	 * to store the trace event for the trigger to use. It's recursive
2940 	 * safe and will not be recorded anywhere.
2941 	 */
2942 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2943 		*current_rb = temp_buffer;
2944 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2945 						    trace_ctx);
2946 	}
2947 	return entry;
2948 }
2949 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2950 
2951 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2952 static DEFINE_MUTEX(tracepoint_printk_mutex);
2953 
2954 static void output_printk(struct trace_event_buffer *fbuffer)
2955 {
2956 	struct trace_event_call *event_call;
2957 	struct trace_event_file *file;
2958 	struct trace_event *event;
2959 	unsigned long flags;
2960 	struct trace_iterator *iter = tracepoint_print_iter;
2961 
2962 	/* We should never get here if iter is NULL */
2963 	if (WARN_ON_ONCE(!iter))
2964 		return;
2965 
2966 	event_call = fbuffer->trace_file->event_call;
2967 	if (!event_call || !event_call->event.funcs ||
2968 	    !event_call->event.funcs->trace)
2969 		return;
2970 
2971 	file = fbuffer->trace_file;
2972 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2973 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2974 	     !filter_match_preds(file->filter, fbuffer->entry)))
2975 		return;
2976 
2977 	event = &fbuffer->trace_file->event_call->event;
2978 
2979 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2980 	trace_seq_init(&iter->seq);
2981 	iter->ent = fbuffer->entry;
2982 	event_call->event.funcs->trace(iter, 0, event);
2983 	trace_seq_putc(&iter->seq, 0);
2984 	printk("%s", iter->seq.buffer);
2985 
2986 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2987 }
2988 
2989 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
2990 			     void *buffer, size_t *lenp,
2991 			     loff_t *ppos)
2992 {
2993 	int save_tracepoint_printk;
2994 	int ret;
2995 
2996 	mutex_lock(&tracepoint_printk_mutex);
2997 	save_tracepoint_printk = tracepoint_printk;
2998 
2999 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
3000 
3001 	/*
3002 	 * This will force exiting early, as tracepoint_printk
3003 	 * is always zero when tracepoint_printk_iter is not allocated
3004 	 */
3005 	if (!tracepoint_print_iter)
3006 		tracepoint_printk = 0;
3007 
3008 	if (save_tracepoint_printk == tracepoint_printk)
3009 		goto out;
3010 
3011 	if (tracepoint_printk)
3012 		static_key_enable(&tracepoint_printk_key.key);
3013 	else
3014 		static_key_disable(&tracepoint_printk_key.key);
3015 
3016  out:
3017 	mutex_unlock(&tracepoint_printk_mutex);
3018 
3019 	return ret;
3020 }
3021 
3022 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
3023 {
3024 	enum event_trigger_type tt = ETT_NONE;
3025 	struct trace_event_file *file = fbuffer->trace_file;
3026 
3027 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
3028 			fbuffer->entry, &tt))
3029 		goto discard;
3030 
3031 	if (static_key_false(&tracepoint_printk_key.key))
3032 		output_printk(fbuffer);
3033 
3034 	if (static_branch_unlikely(&trace_event_exports_enabled))
3035 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
3036 
3037 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
3038 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
3039 
3040 discard:
3041 	if (tt)
3042 		event_triggers_post_call(file, tt);
3043 
3044 }
3045 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
3046 
3047 /*
3048  * Skip 3:
3049  *
3050  *   trace_buffer_unlock_commit_regs()
3051  *   trace_event_buffer_commit()
3052  *   trace_event_raw_event_xxx()
3053  */
3054 # define STACK_SKIP 3
3055 
3056 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
3057 				     struct trace_buffer *buffer,
3058 				     struct ring_buffer_event *event,
3059 				     unsigned int trace_ctx,
3060 				     struct pt_regs *regs)
3061 {
3062 	__buffer_unlock_commit(buffer, event);
3063 
3064 	/*
3065 	 * If regs is not set, then skip the necessary functions.
3066 	 * Note, we can still get here via blktrace, wakeup tracer
3067 	 * and mmiotrace, but that's ok if they lose a function or
3068 	 * two. They are not that meaningful.
3069 	 */
3070 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
3071 	ftrace_trace_userstack(tr, buffer, trace_ctx);
3072 }
3073 
3074 /*
3075  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
3076  */
3077 void
3078 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
3079 				   struct ring_buffer_event *event)
3080 {
3081 	__buffer_unlock_commit(buffer, event);
3082 }
3083 
3084 void
3085 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
3086 	       parent_ip, unsigned int trace_ctx)
3087 {
3088 	struct trace_event_call *call = &event_function;
3089 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3090 	struct ring_buffer_event *event;
3091 	struct ftrace_entry *entry;
3092 
3093 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
3094 					    trace_ctx);
3095 	if (!event)
3096 		return;
3097 	entry	= ring_buffer_event_data(event);
3098 	entry->ip			= ip;
3099 	entry->parent_ip		= parent_ip;
3100 
3101 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3102 		if (static_branch_unlikely(&trace_function_exports_enabled))
3103 			ftrace_exports(event, TRACE_EXPORT_FUNCTION);
3104 		__buffer_unlock_commit(buffer, event);
3105 	}
3106 }
3107 
3108 #ifdef CONFIG_STACKTRACE
3109 
3110 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
3111 #define FTRACE_KSTACK_NESTING	4
3112 
3113 #define FTRACE_KSTACK_ENTRIES	(PAGE_SIZE / FTRACE_KSTACK_NESTING)
3114 
3115 struct ftrace_stack {
3116 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
3117 };
3118 
3119 
3120 struct ftrace_stacks {
3121 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
3122 };
3123 
3124 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
3125 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
3126 
3127 static void __ftrace_trace_stack(struct trace_buffer *buffer,
3128 				 unsigned int trace_ctx,
3129 				 int skip, struct pt_regs *regs)
3130 {
3131 	struct trace_event_call *call = &event_kernel_stack;
3132 	struct ring_buffer_event *event;
3133 	unsigned int size, nr_entries;
3134 	struct ftrace_stack *fstack;
3135 	struct stack_entry *entry;
3136 	int stackidx;
3137 
3138 	/*
3139 	 * Add one, for this function and the call to save_stack_trace()
3140 	 * If regs is set, then these functions will not be in the way.
3141 	 */
3142 #ifndef CONFIG_UNWINDER_ORC
3143 	if (!regs)
3144 		skip++;
3145 #endif
3146 
3147 	preempt_disable_notrace();
3148 
3149 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
3150 
3151 	/* This should never happen. If it does, yell once and skip */
3152 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
3153 		goto out;
3154 
3155 	/*
3156 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
3157 	 * interrupt will either see the value pre increment or post
3158 	 * increment. If the interrupt happens pre increment it will have
3159 	 * restored the counter when it returns.  We just need a barrier to
3160 	 * keep gcc from moving things around.
3161 	 */
3162 	barrier();
3163 
3164 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
3165 	size = ARRAY_SIZE(fstack->calls);
3166 
3167 	if (regs) {
3168 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
3169 						   size, skip);
3170 	} else {
3171 		nr_entries = stack_trace_save(fstack->calls, size, skip);
3172 	}
3173 
3174 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3175 				    struct_size(entry, caller, nr_entries),
3176 				    trace_ctx);
3177 	if (!event)
3178 		goto out;
3179 	entry = ring_buffer_event_data(event);
3180 
3181 	entry->size = nr_entries;
3182 	memcpy(&entry->caller, fstack->calls,
3183 	       flex_array_size(entry, caller, nr_entries));
3184 
3185 	if (!call_filter_check_discard(call, entry, buffer, event))
3186 		__buffer_unlock_commit(buffer, event);
3187 
3188  out:
3189 	/* Again, don't let gcc optimize things here */
3190 	barrier();
3191 	__this_cpu_dec(ftrace_stack_reserve);
3192 	preempt_enable_notrace();
3193 
3194 }
3195 
3196 static inline void ftrace_trace_stack(struct trace_array *tr,
3197 				      struct trace_buffer *buffer,
3198 				      unsigned int trace_ctx,
3199 				      int skip, struct pt_regs *regs)
3200 {
3201 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3202 		return;
3203 
3204 	__ftrace_trace_stack(buffer, trace_ctx, skip, regs);
3205 }
3206 
3207 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3208 		   int skip)
3209 {
3210 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3211 
3212 	if (rcu_is_watching()) {
3213 		__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3214 		return;
3215 	}
3216 
3217 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3218 		return;
3219 
3220 	/*
3221 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3222 	 * but if the above rcu_is_watching() failed, then the NMI
3223 	 * triggered someplace critical, and ct_irq_enter() should
3224 	 * not be called from NMI.
3225 	 */
3226 	if (unlikely(in_nmi()))
3227 		return;
3228 
3229 	ct_irq_enter_irqson();
3230 	__ftrace_trace_stack(buffer, trace_ctx, skip, NULL);
3231 	ct_irq_exit_irqson();
3232 }
3233 
3234 /**
3235  * trace_dump_stack - record a stack back trace in the trace buffer
3236  * @skip: Number of functions to skip (helper handlers)
3237  */
3238 void trace_dump_stack(int skip)
3239 {
3240 	if (tracing_disabled || tracing_selftest_running)
3241 		return;
3242 
3243 #ifndef CONFIG_UNWINDER_ORC
3244 	/* Skip 1 to skip this function. */
3245 	skip++;
3246 #endif
3247 	__ftrace_trace_stack(global_trace.array_buffer.buffer,
3248 			     tracing_gen_ctx(), skip, NULL);
3249 }
3250 EXPORT_SYMBOL_GPL(trace_dump_stack);
3251 
3252 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3253 static DEFINE_PER_CPU(int, user_stack_count);
3254 
3255 static void
3256 ftrace_trace_userstack(struct trace_array *tr,
3257 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3258 {
3259 	struct trace_event_call *call = &event_user_stack;
3260 	struct ring_buffer_event *event;
3261 	struct userstack_entry *entry;
3262 
3263 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3264 		return;
3265 
3266 	/*
3267 	 * NMIs can not handle page faults, even with fix ups.
3268 	 * The save user stack can (and often does) fault.
3269 	 */
3270 	if (unlikely(in_nmi()))
3271 		return;
3272 
3273 	/*
3274 	 * prevent recursion, since the user stack tracing may
3275 	 * trigger other kernel events.
3276 	 */
3277 	preempt_disable();
3278 	if (__this_cpu_read(user_stack_count))
3279 		goto out;
3280 
3281 	__this_cpu_inc(user_stack_count);
3282 
3283 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3284 					    sizeof(*entry), trace_ctx);
3285 	if (!event)
3286 		goto out_drop_count;
3287 	entry	= ring_buffer_event_data(event);
3288 
3289 	entry->tgid		= current->tgid;
3290 	memset(&entry->caller, 0, sizeof(entry->caller));
3291 
3292 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3293 	if (!call_filter_check_discard(call, entry, buffer, event))
3294 		__buffer_unlock_commit(buffer, event);
3295 
3296  out_drop_count:
3297 	__this_cpu_dec(user_stack_count);
3298  out:
3299 	preempt_enable();
3300 }
3301 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3302 static void ftrace_trace_userstack(struct trace_array *tr,
3303 				   struct trace_buffer *buffer,
3304 				   unsigned int trace_ctx)
3305 {
3306 }
3307 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3308 
3309 #endif /* CONFIG_STACKTRACE */
3310 
3311 static inline void
3312 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3313 			  unsigned long long delta)
3314 {
3315 	entry->bottom_delta_ts = delta & U32_MAX;
3316 	entry->top_delta_ts = (delta >> 32);
3317 }
3318 
3319 void trace_last_func_repeats(struct trace_array *tr,
3320 			     struct trace_func_repeats *last_info,
3321 			     unsigned int trace_ctx)
3322 {
3323 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3324 	struct func_repeats_entry *entry;
3325 	struct ring_buffer_event *event;
3326 	u64 delta;
3327 
3328 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3329 					    sizeof(*entry), trace_ctx);
3330 	if (!event)
3331 		return;
3332 
3333 	delta = ring_buffer_event_time_stamp(buffer, event) -
3334 		last_info->ts_last_call;
3335 
3336 	entry = ring_buffer_event_data(event);
3337 	entry->ip = last_info->ip;
3338 	entry->parent_ip = last_info->parent_ip;
3339 	entry->count = last_info->count;
3340 	func_repeats_set_delta_ts(entry, delta);
3341 
3342 	__buffer_unlock_commit(buffer, event);
3343 }
3344 
3345 /* created for use with alloc_percpu */
3346 struct trace_buffer_struct {
3347 	int nesting;
3348 	char buffer[4][TRACE_BUF_SIZE];
3349 };
3350 
3351 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3352 
3353 /*
3354  * This allows for lockless recording.  If we're nested too deeply, then
3355  * this returns NULL.
3356  */
3357 static char *get_trace_buf(void)
3358 {
3359 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3360 
3361 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3362 		return NULL;
3363 
3364 	buffer->nesting++;
3365 
3366 	/* Interrupts must see nesting incremented before we use the buffer */
3367 	barrier();
3368 	return &buffer->buffer[buffer->nesting - 1][0];
3369 }
3370 
3371 static void put_trace_buf(void)
3372 {
3373 	/* Don't let the decrement of nesting leak before this */
3374 	barrier();
3375 	this_cpu_dec(trace_percpu_buffer->nesting);
3376 }
3377 
3378 static int alloc_percpu_trace_buffer(void)
3379 {
3380 	struct trace_buffer_struct __percpu *buffers;
3381 
3382 	if (trace_percpu_buffer)
3383 		return 0;
3384 
3385 	buffers = alloc_percpu(struct trace_buffer_struct);
3386 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3387 		return -ENOMEM;
3388 
3389 	trace_percpu_buffer = buffers;
3390 	return 0;
3391 }
3392 
3393 static int buffers_allocated;
3394 
3395 void trace_printk_init_buffers(void)
3396 {
3397 	if (buffers_allocated)
3398 		return;
3399 
3400 	if (alloc_percpu_trace_buffer())
3401 		return;
3402 
3403 	/* trace_printk() is for debug use only. Don't use it in production. */
3404 
3405 	pr_warn("\n");
3406 	pr_warn("**********************************************************\n");
3407 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3408 	pr_warn("**                                                      **\n");
3409 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3410 	pr_warn("**                                                      **\n");
3411 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3412 	pr_warn("** unsafe for production use.                           **\n");
3413 	pr_warn("**                                                      **\n");
3414 	pr_warn("** If you see this message and you are not debugging    **\n");
3415 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3416 	pr_warn("**                                                      **\n");
3417 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3418 	pr_warn("**********************************************************\n");
3419 
3420 	/* Expand the buffers to set size */
3421 	tracing_update_buffers(&global_trace);
3422 
3423 	buffers_allocated = 1;
3424 
3425 	/*
3426 	 * trace_printk_init_buffers() can be called by modules.
3427 	 * If that happens, then we need to start cmdline recording
3428 	 * directly here. If the global_trace.buffer is already
3429 	 * allocated here, then this was called by module code.
3430 	 */
3431 	if (global_trace.array_buffer.buffer)
3432 		tracing_start_cmdline_record();
3433 }
3434 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3435 
3436 void trace_printk_start_comm(void)
3437 {
3438 	/* Start tracing comms if trace printk is set */
3439 	if (!buffers_allocated)
3440 		return;
3441 	tracing_start_cmdline_record();
3442 }
3443 
3444 static void trace_printk_start_stop_comm(int enabled)
3445 {
3446 	if (!buffers_allocated)
3447 		return;
3448 
3449 	if (enabled)
3450 		tracing_start_cmdline_record();
3451 	else
3452 		tracing_stop_cmdline_record();
3453 }
3454 
3455 /**
3456  * trace_vbprintk - write binary msg to tracing buffer
3457  * @ip:    The address of the caller
3458  * @fmt:   The string format to write to the buffer
3459  * @args:  Arguments for @fmt
3460  */
3461 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3462 {
3463 	struct trace_event_call *call = &event_bprint;
3464 	struct ring_buffer_event *event;
3465 	struct trace_buffer *buffer;
3466 	struct trace_array *tr = &global_trace;
3467 	struct bprint_entry *entry;
3468 	unsigned int trace_ctx;
3469 	char *tbuffer;
3470 	int len = 0, size;
3471 
3472 	if (unlikely(tracing_selftest_running || tracing_disabled))
3473 		return 0;
3474 
3475 	/* Don't pollute graph traces with trace_vprintk internals */
3476 	pause_graph_tracing();
3477 
3478 	trace_ctx = tracing_gen_ctx();
3479 	preempt_disable_notrace();
3480 
3481 	tbuffer = get_trace_buf();
3482 	if (!tbuffer) {
3483 		len = 0;
3484 		goto out_nobuffer;
3485 	}
3486 
3487 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3488 
3489 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3490 		goto out_put;
3491 
3492 	size = sizeof(*entry) + sizeof(u32) * len;
3493 	buffer = tr->array_buffer.buffer;
3494 	ring_buffer_nest_start(buffer);
3495 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3496 					    trace_ctx);
3497 	if (!event)
3498 		goto out;
3499 	entry = ring_buffer_event_data(event);
3500 	entry->ip			= ip;
3501 	entry->fmt			= fmt;
3502 
3503 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3504 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3505 		__buffer_unlock_commit(buffer, event);
3506 		ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3507 	}
3508 
3509 out:
3510 	ring_buffer_nest_end(buffer);
3511 out_put:
3512 	put_trace_buf();
3513 
3514 out_nobuffer:
3515 	preempt_enable_notrace();
3516 	unpause_graph_tracing();
3517 
3518 	return len;
3519 }
3520 EXPORT_SYMBOL_GPL(trace_vbprintk);
3521 
3522 __printf(3, 0)
3523 static int
3524 __trace_array_vprintk(struct trace_buffer *buffer,
3525 		      unsigned long ip, const char *fmt, va_list args)
3526 {
3527 	struct trace_event_call *call = &event_print;
3528 	struct ring_buffer_event *event;
3529 	int len = 0, size;
3530 	struct print_entry *entry;
3531 	unsigned int trace_ctx;
3532 	char *tbuffer;
3533 
3534 	if (tracing_disabled)
3535 		return 0;
3536 
3537 	/* Don't pollute graph traces with trace_vprintk internals */
3538 	pause_graph_tracing();
3539 
3540 	trace_ctx = tracing_gen_ctx();
3541 	preempt_disable_notrace();
3542 
3543 
3544 	tbuffer = get_trace_buf();
3545 	if (!tbuffer) {
3546 		len = 0;
3547 		goto out_nobuffer;
3548 	}
3549 
3550 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3551 
3552 	size = sizeof(*entry) + len + 1;
3553 	ring_buffer_nest_start(buffer);
3554 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3555 					    trace_ctx);
3556 	if (!event)
3557 		goto out;
3558 	entry = ring_buffer_event_data(event);
3559 	entry->ip = ip;
3560 
3561 	memcpy(&entry->buf, tbuffer, len + 1);
3562 	if (!call_filter_check_discard(call, entry, buffer, event)) {
3563 		__buffer_unlock_commit(buffer, event);
3564 		ftrace_trace_stack(&global_trace, buffer, trace_ctx, 6, NULL);
3565 	}
3566 
3567 out:
3568 	ring_buffer_nest_end(buffer);
3569 	put_trace_buf();
3570 
3571 out_nobuffer:
3572 	preempt_enable_notrace();
3573 	unpause_graph_tracing();
3574 
3575 	return len;
3576 }
3577 
3578 __printf(3, 0)
3579 int trace_array_vprintk(struct trace_array *tr,
3580 			unsigned long ip, const char *fmt, va_list args)
3581 {
3582 	if (tracing_selftest_running && tr == &global_trace)
3583 		return 0;
3584 
3585 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3586 }
3587 
3588 /**
3589  * trace_array_printk - Print a message to a specific instance
3590  * @tr: The instance trace_array descriptor
3591  * @ip: The instruction pointer that this is called from.
3592  * @fmt: The format to print (printf format)
3593  *
3594  * If a subsystem sets up its own instance, they have the right to
3595  * printk strings into their tracing instance buffer using this
3596  * function. Note, this function will not write into the top level
3597  * buffer (use trace_printk() for that), as writing into the top level
3598  * buffer should only have events that can be individually disabled.
3599  * trace_printk() is only used for debugging a kernel, and should not
3600  * be ever incorporated in normal use.
3601  *
3602  * trace_array_printk() can be used, as it will not add noise to the
3603  * top level tracing buffer.
3604  *
3605  * Note, trace_array_init_printk() must be called on @tr before this
3606  * can be used.
3607  */
3608 __printf(3, 0)
3609 int trace_array_printk(struct trace_array *tr,
3610 		       unsigned long ip, const char *fmt, ...)
3611 {
3612 	int ret;
3613 	va_list ap;
3614 
3615 	if (!tr)
3616 		return -ENOENT;
3617 
3618 	/* This is only allowed for created instances */
3619 	if (tr == &global_trace)
3620 		return 0;
3621 
3622 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3623 		return 0;
3624 
3625 	va_start(ap, fmt);
3626 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3627 	va_end(ap);
3628 	return ret;
3629 }
3630 EXPORT_SYMBOL_GPL(trace_array_printk);
3631 
3632 /**
3633  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3634  * @tr: The trace array to initialize the buffers for
3635  *
3636  * As trace_array_printk() only writes into instances, they are OK to
3637  * have in the kernel (unlike trace_printk()). This needs to be called
3638  * before trace_array_printk() can be used on a trace_array.
3639  */
3640 int trace_array_init_printk(struct trace_array *tr)
3641 {
3642 	if (!tr)
3643 		return -ENOENT;
3644 
3645 	/* This is only allowed for created instances */
3646 	if (tr == &global_trace)
3647 		return -EINVAL;
3648 
3649 	return alloc_percpu_trace_buffer();
3650 }
3651 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3652 
3653 __printf(3, 4)
3654 int trace_array_printk_buf(struct trace_buffer *buffer,
3655 			   unsigned long ip, const char *fmt, ...)
3656 {
3657 	int ret;
3658 	va_list ap;
3659 
3660 	if (!(global_trace.trace_flags & TRACE_ITER_PRINTK))
3661 		return 0;
3662 
3663 	va_start(ap, fmt);
3664 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3665 	va_end(ap);
3666 	return ret;
3667 }
3668 
3669 __printf(2, 0)
3670 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3671 {
3672 	return trace_array_vprintk(&global_trace, ip, fmt, args);
3673 }
3674 EXPORT_SYMBOL_GPL(trace_vprintk);
3675 
3676 static void trace_iterator_increment(struct trace_iterator *iter)
3677 {
3678 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3679 
3680 	iter->idx++;
3681 	if (buf_iter)
3682 		ring_buffer_iter_advance(buf_iter);
3683 }
3684 
3685 static struct trace_entry *
3686 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3687 		unsigned long *lost_events)
3688 {
3689 	struct ring_buffer_event *event;
3690 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3691 
3692 	if (buf_iter) {
3693 		event = ring_buffer_iter_peek(buf_iter, ts);
3694 		if (lost_events)
3695 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3696 				(unsigned long)-1 : 0;
3697 	} else {
3698 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3699 					 lost_events);
3700 	}
3701 
3702 	if (event) {
3703 		iter->ent_size = ring_buffer_event_length(event);
3704 		return ring_buffer_event_data(event);
3705 	}
3706 	iter->ent_size = 0;
3707 	return NULL;
3708 }
3709 
3710 static struct trace_entry *
3711 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3712 		  unsigned long *missing_events, u64 *ent_ts)
3713 {
3714 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3715 	struct trace_entry *ent, *next = NULL;
3716 	unsigned long lost_events = 0, next_lost = 0;
3717 	int cpu_file = iter->cpu_file;
3718 	u64 next_ts = 0, ts;
3719 	int next_cpu = -1;
3720 	int next_size = 0;
3721 	int cpu;
3722 
3723 	/*
3724 	 * If we are in a per_cpu trace file, don't bother by iterating over
3725 	 * all cpu and peek directly.
3726 	 */
3727 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3728 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3729 			return NULL;
3730 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3731 		if (ent_cpu)
3732 			*ent_cpu = cpu_file;
3733 
3734 		return ent;
3735 	}
3736 
3737 	for_each_tracing_cpu(cpu) {
3738 
3739 		if (ring_buffer_empty_cpu(buffer, cpu))
3740 			continue;
3741 
3742 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3743 
3744 		/*
3745 		 * Pick the entry with the smallest timestamp:
3746 		 */
3747 		if (ent && (!next || ts < next_ts)) {
3748 			next = ent;
3749 			next_cpu = cpu;
3750 			next_ts = ts;
3751 			next_lost = lost_events;
3752 			next_size = iter->ent_size;
3753 		}
3754 	}
3755 
3756 	iter->ent_size = next_size;
3757 
3758 	if (ent_cpu)
3759 		*ent_cpu = next_cpu;
3760 
3761 	if (ent_ts)
3762 		*ent_ts = next_ts;
3763 
3764 	if (missing_events)
3765 		*missing_events = next_lost;
3766 
3767 	return next;
3768 }
3769 
3770 #define STATIC_FMT_BUF_SIZE	128
3771 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3772 
3773 char *trace_iter_expand_format(struct trace_iterator *iter)
3774 {
3775 	char *tmp;
3776 
3777 	/*
3778 	 * iter->tr is NULL when used with tp_printk, which makes
3779 	 * this get called where it is not safe to call krealloc().
3780 	 */
3781 	if (!iter->tr || iter->fmt == static_fmt_buf)
3782 		return NULL;
3783 
3784 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3785 		       GFP_KERNEL);
3786 	if (tmp) {
3787 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3788 		iter->fmt = tmp;
3789 	}
3790 
3791 	return tmp;
3792 }
3793 
3794 /* Returns true if the string is safe to dereference from an event */
3795 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3796 			   bool star, int len)
3797 {
3798 	unsigned long addr = (unsigned long)str;
3799 	struct trace_event *trace_event;
3800 	struct trace_event_call *event;
3801 
3802 	/* Ignore strings with no length */
3803 	if (star && !len)
3804 		return true;
3805 
3806 	/* OK if part of the event data */
3807 	if ((addr >= (unsigned long)iter->ent) &&
3808 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3809 		return true;
3810 
3811 	/* OK if part of the temp seq buffer */
3812 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3813 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3814 		return true;
3815 
3816 	/* Core rodata can not be freed */
3817 	if (is_kernel_rodata(addr))
3818 		return true;
3819 
3820 	if (trace_is_tracepoint_string(str))
3821 		return true;
3822 
3823 	/*
3824 	 * Now this could be a module event, referencing core module
3825 	 * data, which is OK.
3826 	 */
3827 	if (!iter->ent)
3828 		return false;
3829 
3830 	trace_event = ftrace_find_event(iter->ent->type);
3831 	if (!trace_event)
3832 		return false;
3833 
3834 	event = container_of(trace_event, struct trace_event_call, event);
3835 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3836 		return false;
3837 
3838 	/* Would rather have rodata, but this will suffice */
3839 	if (within_module_core(addr, event->module))
3840 		return true;
3841 
3842 	return false;
3843 }
3844 
3845 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3846 
3847 static int test_can_verify_check(const char *fmt, ...)
3848 {
3849 	char buf[16];
3850 	va_list ap;
3851 	int ret;
3852 
3853 	/*
3854 	 * The verifier is dependent on vsnprintf() modifies the va_list
3855 	 * passed to it, where it is sent as a reference. Some architectures
3856 	 * (like x86_32) passes it by value, which means that vsnprintf()
3857 	 * does not modify the va_list passed to it, and the verifier
3858 	 * would then need to be able to understand all the values that
3859 	 * vsnprintf can use. If it is passed by value, then the verifier
3860 	 * is disabled.
3861 	 */
3862 	va_start(ap, fmt);
3863 	vsnprintf(buf, 16, "%d", ap);
3864 	ret = va_arg(ap, int);
3865 	va_end(ap);
3866 
3867 	return ret;
3868 }
3869 
3870 static void test_can_verify(void)
3871 {
3872 	if (!test_can_verify_check("%d %d", 0, 1)) {
3873 		pr_info("trace event string verifier disabled\n");
3874 		static_branch_inc(&trace_no_verify);
3875 	}
3876 }
3877 
3878 /**
3879  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3880  * @iter: The iterator that holds the seq buffer and the event being printed
3881  * @fmt: The format used to print the event
3882  * @ap: The va_list holding the data to print from @fmt.
3883  *
3884  * This writes the data into the @iter->seq buffer using the data from
3885  * @fmt and @ap. If the format has a %s, then the source of the string
3886  * is examined to make sure it is safe to print, otherwise it will
3887  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3888  * pointer.
3889  */
3890 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3891 			 va_list ap)
3892 {
3893 	const char *p = fmt;
3894 	const char *str;
3895 	int i, j;
3896 
3897 	if (WARN_ON_ONCE(!fmt))
3898 		return;
3899 
3900 	if (static_branch_unlikely(&trace_no_verify))
3901 		goto print;
3902 
3903 	/* Don't bother checking when doing a ftrace_dump() */
3904 	if (iter->fmt == static_fmt_buf)
3905 		goto print;
3906 
3907 	while (*p) {
3908 		bool star = false;
3909 		int len = 0;
3910 
3911 		j = 0;
3912 
3913 		/* We only care about %s and variants */
3914 		for (i = 0; p[i]; i++) {
3915 			if (i + 1 >= iter->fmt_size) {
3916 				/*
3917 				 * If we can't expand the copy buffer,
3918 				 * just print it.
3919 				 */
3920 				if (!trace_iter_expand_format(iter))
3921 					goto print;
3922 			}
3923 
3924 			if (p[i] == '\\' && p[i+1]) {
3925 				i++;
3926 				continue;
3927 			}
3928 			if (p[i] == '%') {
3929 				/* Need to test cases like %08.*s */
3930 				for (j = 1; p[i+j]; j++) {
3931 					if (isdigit(p[i+j]) ||
3932 					    p[i+j] == '.')
3933 						continue;
3934 					if (p[i+j] == '*') {
3935 						star = true;
3936 						continue;
3937 					}
3938 					break;
3939 				}
3940 				if (p[i+j] == 's')
3941 					break;
3942 				star = false;
3943 			}
3944 			j = 0;
3945 		}
3946 		/* If no %s found then just print normally */
3947 		if (!p[i])
3948 			break;
3949 
3950 		/* Copy up to the %s, and print that */
3951 		strncpy(iter->fmt, p, i);
3952 		iter->fmt[i] = '\0';
3953 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3954 
3955 		/*
3956 		 * If iter->seq is full, the above call no longer guarantees
3957 		 * that ap is in sync with fmt processing, and further calls
3958 		 * to va_arg() can return wrong positional arguments.
3959 		 *
3960 		 * Ensure that ap is no longer used in this case.
3961 		 */
3962 		if (iter->seq.full) {
3963 			p = "";
3964 			break;
3965 		}
3966 
3967 		if (star)
3968 			len = va_arg(ap, int);
3969 
3970 		/* The ap now points to the string data of the %s */
3971 		str = va_arg(ap, const char *);
3972 
3973 		/*
3974 		 * If you hit this warning, it is likely that the
3975 		 * trace event in question used %s on a string that
3976 		 * was saved at the time of the event, but may not be
3977 		 * around when the trace is read. Use __string(),
3978 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3979 		 * instead. See samples/trace_events/trace-events-sample.h
3980 		 * for reference.
3981 		 */
3982 		if (WARN_ONCE(!trace_safe_str(iter, str, star, len),
3983 			      "fmt: '%s' current_buffer: '%s'",
3984 			      fmt, seq_buf_str(&iter->seq.seq))) {
3985 			int ret;
3986 
3987 			/* Try to safely read the string */
3988 			if (star) {
3989 				if (len + 1 > iter->fmt_size)
3990 					len = iter->fmt_size - 1;
3991 				if (len < 0)
3992 					len = 0;
3993 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3994 				iter->fmt[len] = 0;
3995 				star = false;
3996 			} else {
3997 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3998 								  iter->fmt_size);
3999 			}
4000 			if (ret < 0)
4001 				trace_seq_printf(&iter->seq, "(0x%px)", str);
4002 			else
4003 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
4004 						 str, iter->fmt);
4005 			str = "[UNSAFE-MEMORY]";
4006 			strcpy(iter->fmt, "%s");
4007 		} else {
4008 			strncpy(iter->fmt, p + i, j + 1);
4009 			iter->fmt[j+1] = '\0';
4010 		}
4011 		if (star)
4012 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
4013 		else
4014 			trace_seq_printf(&iter->seq, iter->fmt, str);
4015 
4016 		p += i + j + 1;
4017 	}
4018  print:
4019 	if (*p)
4020 		trace_seq_vprintf(&iter->seq, p, ap);
4021 }
4022 
4023 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
4024 {
4025 	const char *p, *new_fmt;
4026 	char *q;
4027 
4028 	if (WARN_ON_ONCE(!fmt))
4029 		return fmt;
4030 
4031 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
4032 		return fmt;
4033 
4034 	p = fmt;
4035 	new_fmt = q = iter->fmt;
4036 	while (*p) {
4037 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
4038 			if (!trace_iter_expand_format(iter))
4039 				return fmt;
4040 
4041 			q += iter->fmt - new_fmt;
4042 			new_fmt = iter->fmt;
4043 		}
4044 
4045 		*q++ = *p++;
4046 
4047 		/* Replace %p with %px */
4048 		if (p[-1] == '%') {
4049 			if (p[0] == '%') {
4050 				*q++ = *p++;
4051 			} else if (p[0] == 'p' && !isalnum(p[1])) {
4052 				*q++ = *p++;
4053 				*q++ = 'x';
4054 			}
4055 		}
4056 	}
4057 	*q = '\0';
4058 
4059 	return new_fmt;
4060 }
4061 
4062 #define STATIC_TEMP_BUF_SIZE	128
4063 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
4064 
4065 /* Find the next real entry, without updating the iterator itself */
4066 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
4067 					  int *ent_cpu, u64 *ent_ts)
4068 {
4069 	/* __find_next_entry will reset ent_size */
4070 	int ent_size = iter->ent_size;
4071 	struct trace_entry *entry;
4072 
4073 	/*
4074 	 * If called from ftrace_dump(), then the iter->temp buffer
4075 	 * will be the static_temp_buf and not created from kmalloc.
4076 	 * If the entry size is greater than the buffer, we can
4077 	 * not save it. Just return NULL in that case. This is only
4078 	 * used to add markers when two consecutive events' time
4079 	 * stamps have a large delta. See trace_print_lat_context()
4080 	 */
4081 	if (iter->temp == static_temp_buf &&
4082 	    STATIC_TEMP_BUF_SIZE < ent_size)
4083 		return NULL;
4084 
4085 	/*
4086 	 * The __find_next_entry() may call peek_next_entry(), which may
4087 	 * call ring_buffer_peek() that may make the contents of iter->ent
4088 	 * undefined. Need to copy iter->ent now.
4089 	 */
4090 	if (iter->ent && iter->ent != iter->temp) {
4091 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
4092 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
4093 			void *temp;
4094 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
4095 			if (!temp)
4096 				return NULL;
4097 			kfree(iter->temp);
4098 			iter->temp = temp;
4099 			iter->temp_size = iter->ent_size;
4100 		}
4101 		memcpy(iter->temp, iter->ent, iter->ent_size);
4102 		iter->ent = iter->temp;
4103 	}
4104 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
4105 	/* Put back the original ent_size */
4106 	iter->ent_size = ent_size;
4107 
4108 	return entry;
4109 }
4110 
4111 /* Find the next real entry, and increment the iterator to the next entry */
4112 void *trace_find_next_entry_inc(struct trace_iterator *iter)
4113 {
4114 	iter->ent = __find_next_entry(iter, &iter->cpu,
4115 				      &iter->lost_events, &iter->ts);
4116 
4117 	if (iter->ent)
4118 		trace_iterator_increment(iter);
4119 
4120 	return iter->ent ? iter : NULL;
4121 }
4122 
4123 static void trace_consume(struct trace_iterator *iter)
4124 {
4125 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
4126 			    &iter->lost_events);
4127 }
4128 
4129 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
4130 {
4131 	struct trace_iterator *iter = m->private;
4132 	int i = (int)*pos;
4133 	void *ent;
4134 
4135 	WARN_ON_ONCE(iter->leftover);
4136 
4137 	(*pos)++;
4138 
4139 	/* can't go backwards */
4140 	if (iter->idx > i)
4141 		return NULL;
4142 
4143 	if (iter->idx < 0)
4144 		ent = trace_find_next_entry_inc(iter);
4145 	else
4146 		ent = iter;
4147 
4148 	while (ent && iter->idx < i)
4149 		ent = trace_find_next_entry_inc(iter);
4150 
4151 	iter->pos = *pos;
4152 
4153 	return ent;
4154 }
4155 
4156 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4157 {
4158 	struct ring_buffer_iter *buf_iter;
4159 	unsigned long entries = 0;
4160 	u64 ts;
4161 
4162 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4163 
4164 	buf_iter = trace_buffer_iter(iter, cpu);
4165 	if (!buf_iter)
4166 		return;
4167 
4168 	ring_buffer_iter_reset(buf_iter);
4169 
4170 	/*
4171 	 * We could have the case with the max latency tracers
4172 	 * that a reset never took place on a cpu. This is evident
4173 	 * by the timestamp being before the start of the buffer.
4174 	 */
4175 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4176 		if (ts >= iter->array_buffer->time_start)
4177 			break;
4178 		entries++;
4179 		ring_buffer_iter_advance(buf_iter);
4180 	}
4181 
4182 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4183 }
4184 
4185 /*
4186  * The current tracer is copied to avoid a global locking
4187  * all around.
4188  */
4189 static void *s_start(struct seq_file *m, loff_t *pos)
4190 {
4191 	struct trace_iterator *iter = m->private;
4192 	struct trace_array *tr = iter->tr;
4193 	int cpu_file = iter->cpu_file;
4194 	void *p = NULL;
4195 	loff_t l = 0;
4196 	int cpu;
4197 
4198 	mutex_lock(&trace_types_lock);
4199 	if (unlikely(tr->current_trace != iter->trace)) {
4200 		/* Close iter->trace before switching to the new current tracer */
4201 		if (iter->trace->close)
4202 			iter->trace->close(iter);
4203 		iter->trace = tr->current_trace;
4204 		/* Reopen the new current tracer */
4205 		if (iter->trace->open)
4206 			iter->trace->open(iter);
4207 	}
4208 	mutex_unlock(&trace_types_lock);
4209 
4210 #ifdef CONFIG_TRACER_MAX_TRACE
4211 	if (iter->snapshot && iter->trace->use_max_tr)
4212 		return ERR_PTR(-EBUSY);
4213 #endif
4214 
4215 	if (*pos != iter->pos) {
4216 		iter->ent = NULL;
4217 		iter->cpu = 0;
4218 		iter->idx = -1;
4219 
4220 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4221 			for_each_tracing_cpu(cpu)
4222 				tracing_iter_reset(iter, cpu);
4223 		} else
4224 			tracing_iter_reset(iter, cpu_file);
4225 
4226 		iter->leftover = 0;
4227 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4228 			;
4229 
4230 	} else {
4231 		/*
4232 		 * If we overflowed the seq_file before, then we want
4233 		 * to just reuse the trace_seq buffer again.
4234 		 */
4235 		if (iter->leftover)
4236 			p = iter;
4237 		else {
4238 			l = *pos - 1;
4239 			p = s_next(m, p, &l);
4240 		}
4241 	}
4242 
4243 	trace_event_read_lock();
4244 	trace_access_lock(cpu_file);
4245 	return p;
4246 }
4247 
4248 static void s_stop(struct seq_file *m, void *p)
4249 {
4250 	struct trace_iterator *iter = m->private;
4251 
4252 #ifdef CONFIG_TRACER_MAX_TRACE
4253 	if (iter->snapshot && iter->trace->use_max_tr)
4254 		return;
4255 #endif
4256 
4257 	trace_access_unlock(iter->cpu_file);
4258 	trace_event_read_unlock();
4259 }
4260 
4261 static void
4262 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4263 		      unsigned long *entries, int cpu)
4264 {
4265 	unsigned long count;
4266 
4267 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4268 	/*
4269 	 * If this buffer has skipped entries, then we hold all
4270 	 * entries for the trace and we need to ignore the
4271 	 * ones before the time stamp.
4272 	 */
4273 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4274 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4275 		/* total is the same as the entries */
4276 		*total = count;
4277 	} else
4278 		*total = count +
4279 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4280 	*entries = count;
4281 }
4282 
4283 static void
4284 get_total_entries(struct array_buffer *buf,
4285 		  unsigned long *total, unsigned long *entries)
4286 {
4287 	unsigned long t, e;
4288 	int cpu;
4289 
4290 	*total = 0;
4291 	*entries = 0;
4292 
4293 	for_each_tracing_cpu(cpu) {
4294 		get_total_entries_cpu(buf, &t, &e, cpu);
4295 		*total += t;
4296 		*entries += e;
4297 	}
4298 }
4299 
4300 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4301 {
4302 	unsigned long total, entries;
4303 
4304 	if (!tr)
4305 		tr = &global_trace;
4306 
4307 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4308 
4309 	return entries;
4310 }
4311 
4312 unsigned long trace_total_entries(struct trace_array *tr)
4313 {
4314 	unsigned long total, entries;
4315 
4316 	if (!tr)
4317 		tr = &global_trace;
4318 
4319 	get_total_entries(&tr->array_buffer, &total, &entries);
4320 
4321 	return entries;
4322 }
4323 
4324 static void print_lat_help_header(struct seq_file *m)
4325 {
4326 	seq_puts(m, "#                    _------=> CPU#            \n"
4327 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4328 		    "#                  | / _----=> need-resched    \n"
4329 		    "#                  || / _---=> hardirq/softirq \n"
4330 		    "#                  ||| / _--=> preempt-depth   \n"
4331 		    "#                  |||| / _-=> migrate-disable \n"
4332 		    "#                  ||||| /     delay           \n"
4333 		    "#  cmd     pid     |||||| time  |   caller     \n"
4334 		    "#     \\   /        ||||||  \\    |    /       \n");
4335 }
4336 
4337 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4338 {
4339 	unsigned long total;
4340 	unsigned long entries;
4341 
4342 	get_total_entries(buf, &total, &entries);
4343 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4344 		   entries, total, num_online_cpus());
4345 	seq_puts(m, "#\n");
4346 }
4347 
4348 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4349 				   unsigned int flags)
4350 {
4351 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4352 
4353 	print_event_info(buf, m);
4354 
4355 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4356 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4357 }
4358 
4359 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4360 				       unsigned int flags)
4361 {
4362 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4363 	static const char space[] = "            ";
4364 	int prec = tgid ? 12 : 2;
4365 
4366 	print_event_info(buf, m);
4367 
4368 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4369 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4370 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4371 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4372 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4373 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4374 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4375 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4376 }
4377 
4378 void
4379 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4380 {
4381 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4382 	struct array_buffer *buf = iter->array_buffer;
4383 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4384 	struct tracer *type = iter->trace;
4385 	unsigned long entries;
4386 	unsigned long total;
4387 	const char *name = type->name;
4388 
4389 	get_total_entries(buf, &total, &entries);
4390 
4391 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4392 		   name, UTS_RELEASE);
4393 	seq_puts(m, "# -----------------------------------"
4394 		 "---------------------------------\n");
4395 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4396 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4397 		   nsecs_to_usecs(data->saved_latency),
4398 		   entries,
4399 		   total,
4400 		   buf->cpu,
4401 		   preempt_model_none()      ? "server" :
4402 		   preempt_model_voluntary() ? "desktop" :
4403 		   preempt_model_full()      ? "preempt" :
4404 		   preempt_model_rt()        ? "preempt_rt" :
4405 		   "unknown",
4406 		   /* These are reserved for later use */
4407 		   0, 0, 0, 0);
4408 #ifdef CONFIG_SMP
4409 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4410 #else
4411 	seq_puts(m, ")\n");
4412 #endif
4413 	seq_puts(m, "#    -----------------\n");
4414 	seq_printf(m, "#    | task: %.16s-%d "
4415 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4416 		   data->comm, data->pid,
4417 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4418 		   data->policy, data->rt_priority);
4419 	seq_puts(m, "#    -----------------\n");
4420 
4421 	if (data->critical_start) {
4422 		seq_puts(m, "#  => started at: ");
4423 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4424 		trace_print_seq(m, &iter->seq);
4425 		seq_puts(m, "\n#  => ended at:   ");
4426 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4427 		trace_print_seq(m, &iter->seq);
4428 		seq_puts(m, "\n#\n");
4429 	}
4430 
4431 	seq_puts(m, "#\n");
4432 }
4433 
4434 static void test_cpu_buff_start(struct trace_iterator *iter)
4435 {
4436 	struct trace_seq *s = &iter->seq;
4437 	struct trace_array *tr = iter->tr;
4438 
4439 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4440 		return;
4441 
4442 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4443 		return;
4444 
4445 	if (cpumask_available(iter->started) &&
4446 	    cpumask_test_cpu(iter->cpu, iter->started))
4447 		return;
4448 
4449 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4450 		return;
4451 
4452 	if (cpumask_available(iter->started))
4453 		cpumask_set_cpu(iter->cpu, iter->started);
4454 
4455 	/* Don't print started cpu buffer for the first entry of the trace */
4456 	if (iter->idx > 1)
4457 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4458 				iter->cpu);
4459 }
4460 
4461 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4462 {
4463 	struct trace_array *tr = iter->tr;
4464 	struct trace_seq *s = &iter->seq;
4465 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4466 	struct trace_entry *entry;
4467 	struct trace_event *event;
4468 
4469 	entry = iter->ent;
4470 
4471 	test_cpu_buff_start(iter);
4472 
4473 	event = ftrace_find_event(entry->type);
4474 
4475 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4476 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4477 			trace_print_lat_context(iter);
4478 		else
4479 			trace_print_context(iter);
4480 	}
4481 
4482 	if (trace_seq_has_overflowed(s))
4483 		return TRACE_TYPE_PARTIAL_LINE;
4484 
4485 	if (event) {
4486 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4487 			return print_event_fields(iter, event);
4488 		return event->funcs->trace(iter, sym_flags, event);
4489 	}
4490 
4491 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4492 
4493 	return trace_handle_return(s);
4494 }
4495 
4496 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4497 {
4498 	struct trace_array *tr = iter->tr;
4499 	struct trace_seq *s = &iter->seq;
4500 	struct trace_entry *entry;
4501 	struct trace_event *event;
4502 
4503 	entry = iter->ent;
4504 
4505 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4506 		trace_seq_printf(s, "%d %d %llu ",
4507 				 entry->pid, iter->cpu, iter->ts);
4508 
4509 	if (trace_seq_has_overflowed(s))
4510 		return TRACE_TYPE_PARTIAL_LINE;
4511 
4512 	event = ftrace_find_event(entry->type);
4513 	if (event)
4514 		return event->funcs->raw(iter, 0, event);
4515 
4516 	trace_seq_printf(s, "%d ?\n", entry->type);
4517 
4518 	return trace_handle_return(s);
4519 }
4520 
4521 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4522 {
4523 	struct trace_array *tr = iter->tr;
4524 	struct trace_seq *s = &iter->seq;
4525 	unsigned char newline = '\n';
4526 	struct trace_entry *entry;
4527 	struct trace_event *event;
4528 
4529 	entry = iter->ent;
4530 
4531 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4532 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4533 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4534 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4535 		if (trace_seq_has_overflowed(s))
4536 			return TRACE_TYPE_PARTIAL_LINE;
4537 	}
4538 
4539 	event = ftrace_find_event(entry->type);
4540 	if (event) {
4541 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4542 		if (ret != TRACE_TYPE_HANDLED)
4543 			return ret;
4544 	}
4545 
4546 	SEQ_PUT_FIELD(s, newline);
4547 
4548 	return trace_handle_return(s);
4549 }
4550 
4551 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4552 {
4553 	struct trace_array *tr = iter->tr;
4554 	struct trace_seq *s = &iter->seq;
4555 	struct trace_entry *entry;
4556 	struct trace_event *event;
4557 
4558 	entry = iter->ent;
4559 
4560 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4561 		SEQ_PUT_FIELD(s, entry->pid);
4562 		SEQ_PUT_FIELD(s, iter->cpu);
4563 		SEQ_PUT_FIELD(s, iter->ts);
4564 		if (trace_seq_has_overflowed(s))
4565 			return TRACE_TYPE_PARTIAL_LINE;
4566 	}
4567 
4568 	event = ftrace_find_event(entry->type);
4569 	return event ? event->funcs->binary(iter, 0, event) :
4570 		TRACE_TYPE_HANDLED;
4571 }
4572 
4573 int trace_empty(struct trace_iterator *iter)
4574 {
4575 	struct ring_buffer_iter *buf_iter;
4576 	int cpu;
4577 
4578 	/* If we are looking at one CPU buffer, only check that one */
4579 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4580 		cpu = iter->cpu_file;
4581 		buf_iter = trace_buffer_iter(iter, cpu);
4582 		if (buf_iter) {
4583 			if (!ring_buffer_iter_empty(buf_iter))
4584 				return 0;
4585 		} else {
4586 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4587 				return 0;
4588 		}
4589 		return 1;
4590 	}
4591 
4592 	for_each_tracing_cpu(cpu) {
4593 		buf_iter = trace_buffer_iter(iter, cpu);
4594 		if (buf_iter) {
4595 			if (!ring_buffer_iter_empty(buf_iter))
4596 				return 0;
4597 		} else {
4598 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4599 				return 0;
4600 		}
4601 	}
4602 
4603 	return 1;
4604 }
4605 
4606 /*  Called with trace_event_read_lock() held. */
4607 enum print_line_t print_trace_line(struct trace_iterator *iter)
4608 {
4609 	struct trace_array *tr = iter->tr;
4610 	unsigned long trace_flags = tr->trace_flags;
4611 	enum print_line_t ret;
4612 
4613 	if (iter->lost_events) {
4614 		if (iter->lost_events == (unsigned long)-1)
4615 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4616 					 iter->cpu);
4617 		else
4618 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4619 					 iter->cpu, iter->lost_events);
4620 		if (trace_seq_has_overflowed(&iter->seq))
4621 			return TRACE_TYPE_PARTIAL_LINE;
4622 	}
4623 
4624 	if (iter->trace && iter->trace->print_line) {
4625 		ret = iter->trace->print_line(iter);
4626 		if (ret != TRACE_TYPE_UNHANDLED)
4627 			return ret;
4628 	}
4629 
4630 	if (iter->ent->type == TRACE_BPUTS &&
4631 			trace_flags & TRACE_ITER_PRINTK &&
4632 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4633 		return trace_print_bputs_msg_only(iter);
4634 
4635 	if (iter->ent->type == TRACE_BPRINT &&
4636 			trace_flags & TRACE_ITER_PRINTK &&
4637 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4638 		return trace_print_bprintk_msg_only(iter);
4639 
4640 	if (iter->ent->type == TRACE_PRINT &&
4641 			trace_flags & TRACE_ITER_PRINTK &&
4642 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4643 		return trace_print_printk_msg_only(iter);
4644 
4645 	if (trace_flags & TRACE_ITER_BIN)
4646 		return print_bin_fmt(iter);
4647 
4648 	if (trace_flags & TRACE_ITER_HEX)
4649 		return print_hex_fmt(iter);
4650 
4651 	if (trace_flags & TRACE_ITER_RAW)
4652 		return print_raw_fmt(iter);
4653 
4654 	return print_trace_fmt(iter);
4655 }
4656 
4657 void trace_latency_header(struct seq_file *m)
4658 {
4659 	struct trace_iterator *iter = m->private;
4660 	struct trace_array *tr = iter->tr;
4661 
4662 	/* print nothing if the buffers are empty */
4663 	if (trace_empty(iter))
4664 		return;
4665 
4666 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4667 		print_trace_header(m, iter);
4668 
4669 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4670 		print_lat_help_header(m);
4671 }
4672 
4673 void trace_default_header(struct seq_file *m)
4674 {
4675 	struct trace_iterator *iter = m->private;
4676 	struct trace_array *tr = iter->tr;
4677 	unsigned long trace_flags = tr->trace_flags;
4678 
4679 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4680 		return;
4681 
4682 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4683 		/* print nothing if the buffers are empty */
4684 		if (trace_empty(iter))
4685 			return;
4686 		print_trace_header(m, iter);
4687 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4688 			print_lat_help_header(m);
4689 	} else {
4690 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4691 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4692 				print_func_help_header_irq(iter->array_buffer,
4693 							   m, trace_flags);
4694 			else
4695 				print_func_help_header(iter->array_buffer, m,
4696 						       trace_flags);
4697 		}
4698 	}
4699 }
4700 
4701 static void test_ftrace_alive(struct seq_file *m)
4702 {
4703 	if (!ftrace_is_dead())
4704 		return;
4705 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4706 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4707 }
4708 
4709 #ifdef CONFIG_TRACER_MAX_TRACE
4710 static void show_snapshot_main_help(struct seq_file *m)
4711 {
4712 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4713 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4714 		    "#                      Takes a snapshot of the main buffer.\n"
4715 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4716 		    "#                      (Doesn't have to be '2' works with any number that\n"
4717 		    "#                       is not a '0' or '1')\n");
4718 }
4719 
4720 static void show_snapshot_percpu_help(struct seq_file *m)
4721 {
4722 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4723 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4724 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4725 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4726 #else
4727 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4728 		    "#                     Must use main snapshot file to allocate.\n");
4729 #endif
4730 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4731 		    "#                      (Doesn't have to be '2' works with any number that\n"
4732 		    "#                       is not a '0' or '1')\n");
4733 }
4734 
4735 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4736 {
4737 	if (iter->tr->allocated_snapshot)
4738 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4739 	else
4740 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4741 
4742 	seq_puts(m, "# Snapshot commands:\n");
4743 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4744 		show_snapshot_main_help(m);
4745 	else
4746 		show_snapshot_percpu_help(m);
4747 }
4748 #else
4749 /* Should never be called */
4750 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4751 #endif
4752 
4753 static int s_show(struct seq_file *m, void *v)
4754 {
4755 	struct trace_iterator *iter = v;
4756 	int ret;
4757 
4758 	if (iter->ent == NULL) {
4759 		if (iter->tr) {
4760 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4761 			seq_puts(m, "#\n");
4762 			test_ftrace_alive(m);
4763 		}
4764 		if (iter->snapshot && trace_empty(iter))
4765 			print_snapshot_help(m, iter);
4766 		else if (iter->trace && iter->trace->print_header)
4767 			iter->trace->print_header(m);
4768 		else
4769 			trace_default_header(m);
4770 
4771 	} else if (iter->leftover) {
4772 		/*
4773 		 * If we filled the seq_file buffer earlier, we
4774 		 * want to just show it now.
4775 		 */
4776 		ret = trace_print_seq(m, &iter->seq);
4777 
4778 		/* ret should this time be zero, but you never know */
4779 		iter->leftover = ret;
4780 
4781 	} else {
4782 		ret = print_trace_line(iter);
4783 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4784 			iter->seq.full = 0;
4785 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4786 		}
4787 		ret = trace_print_seq(m, &iter->seq);
4788 		/*
4789 		 * If we overflow the seq_file buffer, then it will
4790 		 * ask us for this data again at start up.
4791 		 * Use that instead.
4792 		 *  ret is 0 if seq_file write succeeded.
4793 		 *        -1 otherwise.
4794 		 */
4795 		iter->leftover = ret;
4796 	}
4797 
4798 	return 0;
4799 }
4800 
4801 /*
4802  * Should be used after trace_array_get(), trace_types_lock
4803  * ensures that i_cdev was already initialized.
4804  */
4805 static inline int tracing_get_cpu(struct inode *inode)
4806 {
4807 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4808 		return (long)inode->i_cdev - 1;
4809 	return RING_BUFFER_ALL_CPUS;
4810 }
4811 
4812 static const struct seq_operations tracer_seq_ops = {
4813 	.start		= s_start,
4814 	.next		= s_next,
4815 	.stop		= s_stop,
4816 	.show		= s_show,
4817 };
4818 
4819 /*
4820  * Note, as iter itself can be allocated and freed in different
4821  * ways, this function is only used to free its content, and not
4822  * the iterator itself. The only requirement to all the allocations
4823  * is that it must zero all fields (kzalloc), as freeing works with
4824  * ethier allocated content or NULL.
4825  */
4826 static void free_trace_iter_content(struct trace_iterator *iter)
4827 {
4828 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4829 	if (iter->fmt != static_fmt_buf)
4830 		kfree(iter->fmt);
4831 
4832 	kfree(iter->temp);
4833 	kfree(iter->buffer_iter);
4834 	mutex_destroy(&iter->mutex);
4835 	free_cpumask_var(iter->started);
4836 }
4837 
4838 static struct trace_iterator *
4839 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4840 {
4841 	struct trace_array *tr = inode->i_private;
4842 	struct trace_iterator *iter;
4843 	int cpu;
4844 
4845 	if (tracing_disabled)
4846 		return ERR_PTR(-ENODEV);
4847 
4848 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4849 	if (!iter)
4850 		return ERR_PTR(-ENOMEM);
4851 
4852 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4853 				    GFP_KERNEL);
4854 	if (!iter->buffer_iter)
4855 		goto release;
4856 
4857 	/*
4858 	 * trace_find_next_entry() may need to save off iter->ent.
4859 	 * It will place it into the iter->temp buffer. As most
4860 	 * events are less than 128, allocate a buffer of that size.
4861 	 * If one is greater, then trace_find_next_entry() will
4862 	 * allocate a new buffer to adjust for the bigger iter->ent.
4863 	 * It's not critical if it fails to get allocated here.
4864 	 */
4865 	iter->temp = kmalloc(128, GFP_KERNEL);
4866 	if (iter->temp)
4867 		iter->temp_size = 128;
4868 
4869 	/*
4870 	 * trace_event_printf() may need to modify given format
4871 	 * string to replace %p with %px so that it shows real address
4872 	 * instead of hash value. However, that is only for the event
4873 	 * tracing, other tracer may not need. Defer the allocation
4874 	 * until it is needed.
4875 	 */
4876 	iter->fmt = NULL;
4877 	iter->fmt_size = 0;
4878 
4879 	mutex_lock(&trace_types_lock);
4880 	iter->trace = tr->current_trace;
4881 
4882 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4883 		goto fail;
4884 
4885 	iter->tr = tr;
4886 
4887 #ifdef CONFIG_TRACER_MAX_TRACE
4888 	/* Currently only the top directory has a snapshot */
4889 	if (tr->current_trace->print_max || snapshot)
4890 		iter->array_buffer = &tr->max_buffer;
4891 	else
4892 #endif
4893 		iter->array_buffer = &tr->array_buffer;
4894 	iter->snapshot = snapshot;
4895 	iter->pos = -1;
4896 	iter->cpu_file = tracing_get_cpu(inode);
4897 	mutex_init(&iter->mutex);
4898 
4899 	/* Notify the tracer early; before we stop tracing. */
4900 	if (iter->trace->open)
4901 		iter->trace->open(iter);
4902 
4903 	/* Annotate start of buffers if we had overruns */
4904 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4905 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4906 
4907 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4908 	if (trace_clocks[tr->clock_id].in_ns)
4909 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4910 
4911 	/*
4912 	 * If pause-on-trace is enabled, then stop the trace while
4913 	 * dumping, unless this is the "snapshot" file
4914 	 */
4915 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4916 		tracing_stop_tr(tr);
4917 
4918 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4919 		for_each_tracing_cpu(cpu) {
4920 			iter->buffer_iter[cpu] =
4921 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4922 							 cpu, GFP_KERNEL);
4923 		}
4924 		ring_buffer_read_prepare_sync();
4925 		for_each_tracing_cpu(cpu) {
4926 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4927 			tracing_iter_reset(iter, cpu);
4928 		}
4929 	} else {
4930 		cpu = iter->cpu_file;
4931 		iter->buffer_iter[cpu] =
4932 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4933 						 cpu, GFP_KERNEL);
4934 		ring_buffer_read_prepare_sync();
4935 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4936 		tracing_iter_reset(iter, cpu);
4937 	}
4938 
4939 	mutex_unlock(&trace_types_lock);
4940 
4941 	return iter;
4942 
4943  fail:
4944 	mutex_unlock(&trace_types_lock);
4945 	free_trace_iter_content(iter);
4946 release:
4947 	seq_release_private(inode, file);
4948 	return ERR_PTR(-ENOMEM);
4949 }
4950 
4951 int tracing_open_generic(struct inode *inode, struct file *filp)
4952 {
4953 	int ret;
4954 
4955 	ret = tracing_check_open_get_tr(NULL);
4956 	if (ret)
4957 		return ret;
4958 
4959 	filp->private_data = inode->i_private;
4960 	return 0;
4961 }
4962 
4963 bool tracing_is_disabled(void)
4964 {
4965 	return (tracing_disabled) ? true: false;
4966 }
4967 
4968 /*
4969  * Open and update trace_array ref count.
4970  * Must have the current trace_array passed to it.
4971  */
4972 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4973 {
4974 	struct trace_array *tr = inode->i_private;
4975 	int ret;
4976 
4977 	ret = tracing_check_open_get_tr(tr);
4978 	if (ret)
4979 		return ret;
4980 
4981 	filp->private_data = inode->i_private;
4982 
4983 	return 0;
4984 }
4985 
4986 /*
4987  * The private pointer of the inode is the trace_event_file.
4988  * Update the tr ref count associated to it.
4989  */
4990 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4991 {
4992 	struct trace_event_file *file = inode->i_private;
4993 	int ret;
4994 
4995 	ret = tracing_check_open_get_tr(file->tr);
4996 	if (ret)
4997 		return ret;
4998 
4999 	mutex_lock(&event_mutex);
5000 
5001 	/* Fail if the file is marked for removal */
5002 	if (file->flags & EVENT_FILE_FL_FREED) {
5003 		trace_array_put(file->tr);
5004 		ret = -ENODEV;
5005 	} else {
5006 		event_file_get(file);
5007 	}
5008 
5009 	mutex_unlock(&event_mutex);
5010 	if (ret)
5011 		return ret;
5012 
5013 	filp->private_data = inode->i_private;
5014 
5015 	return 0;
5016 }
5017 
5018 int tracing_release_file_tr(struct inode *inode, struct file *filp)
5019 {
5020 	struct trace_event_file *file = inode->i_private;
5021 
5022 	trace_array_put(file->tr);
5023 	event_file_put(file);
5024 
5025 	return 0;
5026 }
5027 
5028 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
5029 {
5030 	tracing_release_file_tr(inode, filp);
5031 	return single_release(inode, filp);
5032 }
5033 
5034 static int tracing_mark_open(struct inode *inode, struct file *filp)
5035 {
5036 	stream_open(inode, filp);
5037 	return tracing_open_generic_tr(inode, filp);
5038 }
5039 
5040 static int tracing_release(struct inode *inode, struct file *file)
5041 {
5042 	struct trace_array *tr = inode->i_private;
5043 	struct seq_file *m = file->private_data;
5044 	struct trace_iterator *iter;
5045 	int cpu;
5046 
5047 	if (!(file->f_mode & FMODE_READ)) {
5048 		trace_array_put(tr);
5049 		return 0;
5050 	}
5051 
5052 	/* Writes do not use seq_file */
5053 	iter = m->private;
5054 	mutex_lock(&trace_types_lock);
5055 
5056 	for_each_tracing_cpu(cpu) {
5057 		if (iter->buffer_iter[cpu])
5058 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
5059 	}
5060 
5061 	if (iter->trace && iter->trace->close)
5062 		iter->trace->close(iter);
5063 
5064 	if (!iter->snapshot && tr->stop_count)
5065 		/* reenable tracing if it was previously enabled */
5066 		tracing_start_tr(tr);
5067 
5068 	__trace_array_put(tr);
5069 
5070 	mutex_unlock(&trace_types_lock);
5071 
5072 	free_trace_iter_content(iter);
5073 	seq_release_private(inode, file);
5074 
5075 	return 0;
5076 }
5077 
5078 int tracing_release_generic_tr(struct inode *inode, struct file *file)
5079 {
5080 	struct trace_array *tr = inode->i_private;
5081 
5082 	trace_array_put(tr);
5083 	return 0;
5084 }
5085 
5086 static int tracing_single_release_tr(struct inode *inode, struct file *file)
5087 {
5088 	struct trace_array *tr = inode->i_private;
5089 
5090 	trace_array_put(tr);
5091 
5092 	return single_release(inode, file);
5093 }
5094 
5095 static int tracing_open(struct inode *inode, struct file *file)
5096 {
5097 	struct trace_array *tr = inode->i_private;
5098 	struct trace_iterator *iter;
5099 	int ret;
5100 
5101 	ret = tracing_check_open_get_tr(tr);
5102 	if (ret)
5103 		return ret;
5104 
5105 	/* If this file was open for write, then erase contents */
5106 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
5107 		int cpu = tracing_get_cpu(inode);
5108 		struct array_buffer *trace_buf = &tr->array_buffer;
5109 
5110 #ifdef CONFIG_TRACER_MAX_TRACE
5111 		if (tr->current_trace->print_max)
5112 			trace_buf = &tr->max_buffer;
5113 #endif
5114 
5115 		if (cpu == RING_BUFFER_ALL_CPUS)
5116 			tracing_reset_online_cpus(trace_buf);
5117 		else
5118 			tracing_reset_cpu(trace_buf, cpu);
5119 	}
5120 
5121 	if (file->f_mode & FMODE_READ) {
5122 		iter = __tracing_open(inode, file, false);
5123 		if (IS_ERR(iter))
5124 			ret = PTR_ERR(iter);
5125 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
5126 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
5127 	}
5128 
5129 	if (ret < 0)
5130 		trace_array_put(tr);
5131 
5132 	return ret;
5133 }
5134 
5135 /*
5136  * Some tracers are not suitable for instance buffers.
5137  * A tracer is always available for the global array (toplevel)
5138  * or if it explicitly states that it is.
5139  */
5140 static bool
5141 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5142 {
5143 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5144 }
5145 
5146 /* Find the next tracer that this trace array may use */
5147 static struct tracer *
5148 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5149 {
5150 	while (t && !trace_ok_for_array(t, tr))
5151 		t = t->next;
5152 
5153 	return t;
5154 }
5155 
5156 static void *
5157 t_next(struct seq_file *m, void *v, loff_t *pos)
5158 {
5159 	struct trace_array *tr = m->private;
5160 	struct tracer *t = v;
5161 
5162 	(*pos)++;
5163 
5164 	if (t)
5165 		t = get_tracer_for_array(tr, t->next);
5166 
5167 	return t;
5168 }
5169 
5170 static void *t_start(struct seq_file *m, loff_t *pos)
5171 {
5172 	struct trace_array *tr = m->private;
5173 	struct tracer *t;
5174 	loff_t l = 0;
5175 
5176 	mutex_lock(&trace_types_lock);
5177 
5178 	t = get_tracer_for_array(tr, trace_types);
5179 	for (; t && l < *pos; t = t_next(m, t, &l))
5180 			;
5181 
5182 	return t;
5183 }
5184 
5185 static void t_stop(struct seq_file *m, void *p)
5186 {
5187 	mutex_unlock(&trace_types_lock);
5188 }
5189 
5190 static int t_show(struct seq_file *m, void *v)
5191 {
5192 	struct tracer *t = v;
5193 
5194 	if (!t)
5195 		return 0;
5196 
5197 	seq_puts(m, t->name);
5198 	if (t->next)
5199 		seq_putc(m, ' ');
5200 	else
5201 		seq_putc(m, '\n');
5202 
5203 	return 0;
5204 }
5205 
5206 static const struct seq_operations show_traces_seq_ops = {
5207 	.start		= t_start,
5208 	.next		= t_next,
5209 	.stop		= t_stop,
5210 	.show		= t_show,
5211 };
5212 
5213 static int show_traces_open(struct inode *inode, struct file *file)
5214 {
5215 	struct trace_array *tr = inode->i_private;
5216 	struct seq_file *m;
5217 	int ret;
5218 
5219 	ret = tracing_check_open_get_tr(tr);
5220 	if (ret)
5221 		return ret;
5222 
5223 	ret = seq_open(file, &show_traces_seq_ops);
5224 	if (ret) {
5225 		trace_array_put(tr);
5226 		return ret;
5227 	}
5228 
5229 	m = file->private_data;
5230 	m->private = tr;
5231 
5232 	return 0;
5233 }
5234 
5235 static int show_traces_release(struct inode *inode, struct file *file)
5236 {
5237 	struct trace_array *tr = inode->i_private;
5238 
5239 	trace_array_put(tr);
5240 	return seq_release(inode, file);
5241 }
5242 
5243 static ssize_t
5244 tracing_write_stub(struct file *filp, const char __user *ubuf,
5245 		   size_t count, loff_t *ppos)
5246 {
5247 	return count;
5248 }
5249 
5250 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5251 {
5252 	int ret;
5253 
5254 	if (file->f_mode & FMODE_READ)
5255 		ret = seq_lseek(file, offset, whence);
5256 	else
5257 		file->f_pos = ret = 0;
5258 
5259 	return ret;
5260 }
5261 
5262 static const struct file_operations tracing_fops = {
5263 	.open		= tracing_open,
5264 	.read		= seq_read,
5265 	.read_iter	= seq_read_iter,
5266 	.splice_read	= copy_splice_read,
5267 	.write		= tracing_write_stub,
5268 	.llseek		= tracing_lseek,
5269 	.release	= tracing_release,
5270 };
5271 
5272 static const struct file_operations show_traces_fops = {
5273 	.open		= show_traces_open,
5274 	.read		= seq_read,
5275 	.llseek		= seq_lseek,
5276 	.release	= show_traces_release,
5277 };
5278 
5279 static ssize_t
5280 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5281 		     size_t count, loff_t *ppos)
5282 {
5283 	struct trace_array *tr = file_inode(filp)->i_private;
5284 	char *mask_str;
5285 	int len;
5286 
5287 	len = snprintf(NULL, 0, "%*pb\n",
5288 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5289 	mask_str = kmalloc(len, GFP_KERNEL);
5290 	if (!mask_str)
5291 		return -ENOMEM;
5292 
5293 	len = snprintf(mask_str, len, "%*pb\n",
5294 		       cpumask_pr_args(tr->tracing_cpumask));
5295 	if (len >= count) {
5296 		count = -EINVAL;
5297 		goto out_err;
5298 	}
5299 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5300 
5301 out_err:
5302 	kfree(mask_str);
5303 
5304 	return count;
5305 }
5306 
5307 int tracing_set_cpumask(struct trace_array *tr,
5308 			cpumask_var_t tracing_cpumask_new)
5309 {
5310 	int cpu;
5311 
5312 	if (!tr)
5313 		return -EINVAL;
5314 
5315 	local_irq_disable();
5316 	arch_spin_lock(&tr->max_lock);
5317 	for_each_tracing_cpu(cpu) {
5318 		/*
5319 		 * Increase/decrease the disabled counter if we are
5320 		 * about to flip a bit in the cpumask:
5321 		 */
5322 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5323 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5324 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5325 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5326 #ifdef CONFIG_TRACER_MAX_TRACE
5327 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5328 #endif
5329 		}
5330 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5331 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5332 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5333 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5334 #ifdef CONFIG_TRACER_MAX_TRACE
5335 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5336 #endif
5337 		}
5338 	}
5339 	arch_spin_unlock(&tr->max_lock);
5340 	local_irq_enable();
5341 
5342 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5343 
5344 	return 0;
5345 }
5346 
5347 static ssize_t
5348 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5349 		      size_t count, loff_t *ppos)
5350 {
5351 	struct trace_array *tr = file_inode(filp)->i_private;
5352 	cpumask_var_t tracing_cpumask_new;
5353 	int err;
5354 
5355 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5356 		return -ENOMEM;
5357 
5358 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5359 	if (err)
5360 		goto err_free;
5361 
5362 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5363 	if (err)
5364 		goto err_free;
5365 
5366 	free_cpumask_var(tracing_cpumask_new);
5367 
5368 	return count;
5369 
5370 err_free:
5371 	free_cpumask_var(tracing_cpumask_new);
5372 
5373 	return err;
5374 }
5375 
5376 static const struct file_operations tracing_cpumask_fops = {
5377 	.open		= tracing_open_generic_tr,
5378 	.read		= tracing_cpumask_read,
5379 	.write		= tracing_cpumask_write,
5380 	.release	= tracing_release_generic_tr,
5381 	.llseek		= generic_file_llseek,
5382 };
5383 
5384 static int tracing_trace_options_show(struct seq_file *m, void *v)
5385 {
5386 	struct tracer_opt *trace_opts;
5387 	struct trace_array *tr = m->private;
5388 	u32 tracer_flags;
5389 	int i;
5390 
5391 	mutex_lock(&trace_types_lock);
5392 	tracer_flags = tr->current_trace->flags->val;
5393 	trace_opts = tr->current_trace->flags->opts;
5394 
5395 	for (i = 0; trace_options[i]; i++) {
5396 		if (tr->trace_flags & (1 << i))
5397 			seq_printf(m, "%s\n", trace_options[i]);
5398 		else
5399 			seq_printf(m, "no%s\n", trace_options[i]);
5400 	}
5401 
5402 	for (i = 0; trace_opts[i].name; i++) {
5403 		if (tracer_flags & trace_opts[i].bit)
5404 			seq_printf(m, "%s\n", trace_opts[i].name);
5405 		else
5406 			seq_printf(m, "no%s\n", trace_opts[i].name);
5407 	}
5408 	mutex_unlock(&trace_types_lock);
5409 
5410 	return 0;
5411 }
5412 
5413 static int __set_tracer_option(struct trace_array *tr,
5414 			       struct tracer_flags *tracer_flags,
5415 			       struct tracer_opt *opts, int neg)
5416 {
5417 	struct tracer *trace = tracer_flags->trace;
5418 	int ret;
5419 
5420 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5421 	if (ret)
5422 		return ret;
5423 
5424 	if (neg)
5425 		tracer_flags->val &= ~opts->bit;
5426 	else
5427 		tracer_flags->val |= opts->bit;
5428 	return 0;
5429 }
5430 
5431 /* Try to assign a tracer specific option */
5432 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5433 {
5434 	struct tracer *trace = tr->current_trace;
5435 	struct tracer_flags *tracer_flags = trace->flags;
5436 	struct tracer_opt *opts = NULL;
5437 	int i;
5438 
5439 	for (i = 0; tracer_flags->opts[i].name; i++) {
5440 		opts = &tracer_flags->opts[i];
5441 
5442 		if (strcmp(cmp, opts->name) == 0)
5443 			return __set_tracer_option(tr, trace->flags, opts, neg);
5444 	}
5445 
5446 	return -EINVAL;
5447 }
5448 
5449 /* Some tracers require overwrite to stay enabled */
5450 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5451 {
5452 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5453 		return -1;
5454 
5455 	return 0;
5456 }
5457 
5458 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5459 {
5460 	int *map;
5461 
5462 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5463 	    (mask == TRACE_ITER_RECORD_CMD))
5464 		lockdep_assert_held(&event_mutex);
5465 
5466 	/* do nothing if flag is already set */
5467 	if (!!(tr->trace_flags & mask) == !!enabled)
5468 		return 0;
5469 
5470 	/* Give the tracer a chance to approve the change */
5471 	if (tr->current_trace->flag_changed)
5472 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5473 			return -EINVAL;
5474 
5475 	if (enabled)
5476 		tr->trace_flags |= mask;
5477 	else
5478 		tr->trace_flags &= ~mask;
5479 
5480 	if (mask == TRACE_ITER_RECORD_CMD)
5481 		trace_event_enable_cmd_record(enabled);
5482 
5483 	if (mask == TRACE_ITER_RECORD_TGID) {
5484 		if (!tgid_map) {
5485 			tgid_map_max = pid_max;
5486 			map = kvcalloc(tgid_map_max + 1, sizeof(*tgid_map),
5487 				       GFP_KERNEL);
5488 
5489 			/*
5490 			 * Pairs with smp_load_acquire() in
5491 			 * trace_find_tgid_ptr() to ensure that if it observes
5492 			 * the tgid_map we just allocated then it also observes
5493 			 * the corresponding tgid_map_max value.
5494 			 */
5495 			smp_store_release(&tgid_map, map);
5496 		}
5497 		if (!tgid_map) {
5498 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5499 			return -ENOMEM;
5500 		}
5501 
5502 		trace_event_enable_tgid_record(enabled);
5503 	}
5504 
5505 	if (mask == TRACE_ITER_EVENT_FORK)
5506 		trace_event_follow_fork(tr, enabled);
5507 
5508 	if (mask == TRACE_ITER_FUNC_FORK)
5509 		ftrace_pid_follow_fork(tr, enabled);
5510 
5511 	if (mask == TRACE_ITER_OVERWRITE) {
5512 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5513 #ifdef CONFIG_TRACER_MAX_TRACE
5514 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5515 #endif
5516 	}
5517 
5518 	if (mask == TRACE_ITER_PRINTK) {
5519 		trace_printk_start_stop_comm(enabled);
5520 		trace_printk_control(enabled);
5521 	}
5522 
5523 	return 0;
5524 }
5525 
5526 int trace_set_options(struct trace_array *tr, char *option)
5527 {
5528 	char *cmp;
5529 	int neg = 0;
5530 	int ret;
5531 	size_t orig_len = strlen(option);
5532 	int len;
5533 
5534 	cmp = strstrip(option);
5535 
5536 	len = str_has_prefix(cmp, "no");
5537 	if (len)
5538 		neg = 1;
5539 
5540 	cmp += len;
5541 
5542 	mutex_lock(&event_mutex);
5543 	mutex_lock(&trace_types_lock);
5544 
5545 	ret = match_string(trace_options, -1, cmp);
5546 	/* If no option could be set, test the specific tracer options */
5547 	if (ret < 0)
5548 		ret = set_tracer_option(tr, cmp, neg);
5549 	else
5550 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5551 
5552 	mutex_unlock(&trace_types_lock);
5553 	mutex_unlock(&event_mutex);
5554 
5555 	/*
5556 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5557 	 * turn it back into a space.
5558 	 */
5559 	if (orig_len > strlen(option))
5560 		option[strlen(option)] = ' ';
5561 
5562 	return ret;
5563 }
5564 
5565 static void __init apply_trace_boot_options(void)
5566 {
5567 	char *buf = trace_boot_options_buf;
5568 	char *option;
5569 
5570 	while (true) {
5571 		option = strsep(&buf, ",");
5572 
5573 		if (!option)
5574 			break;
5575 
5576 		if (*option)
5577 			trace_set_options(&global_trace, option);
5578 
5579 		/* Put back the comma to allow this to be called again */
5580 		if (buf)
5581 			*(buf - 1) = ',';
5582 	}
5583 }
5584 
5585 static ssize_t
5586 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5587 			size_t cnt, loff_t *ppos)
5588 {
5589 	struct seq_file *m = filp->private_data;
5590 	struct trace_array *tr = m->private;
5591 	char buf[64];
5592 	int ret;
5593 
5594 	if (cnt >= sizeof(buf))
5595 		return -EINVAL;
5596 
5597 	if (copy_from_user(buf, ubuf, cnt))
5598 		return -EFAULT;
5599 
5600 	buf[cnt] = 0;
5601 
5602 	ret = trace_set_options(tr, buf);
5603 	if (ret < 0)
5604 		return ret;
5605 
5606 	*ppos += cnt;
5607 
5608 	return cnt;
5609 }
5610 
5611 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5612 {
5613 	struct trace_array *tr = inode->i_private;
5614 	int ret;
5615 
5616 	ret = tracing_check_open_get_tr(tr);
5617 	if (ret)
5618 		return ret;
5619 
5620 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5621 	if (ret < 0)
5622 		trace_array_put(tr);
5623 
5624 	return ret;
5625 }
5626 
5627 static const struct file_operations tracing_iter_fops = {
5628 	.open		= tracing_trace_options_open,
5629 	.read		= seq_read,
5630 	.llseek		= seq_lseek,
5631 	.release	= tracing_single_release_tr,
5632 	.write		= tracing_trace_options_write,
5633 };
5634 
5635 static const char readme_msg[] =
5636 	"tracing mini-HOWTO:\n\n"
5637 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5638 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5639 	" Important files:\n"
5640 	"  trace\t\t\t- The static contents of the buffer\n"
5641 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5642 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5643 	"  current_tracer\t- function and latency tracers\n"
5644 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5645 	"  error_log\t- error log for failed commands (that support it)\n"
5646 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5647 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5648 	"  trace_clock\t\t- change the clock used to order events\n"
5649 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5650 	"      global:   Synced across CPUs but slows tracing down.\n"
5651 	"     counter:   Not a clock, but just an increment\n"
5652 	"      uptime:   Jiffy counter from time of boot\n"
5653 	"        perf:   Same clock that perf events use\n"
5654 #ifdef CONFIG_X86_64
5655 	"     x86-tsc:   TSC cycle counter\n"
5656 #endif
5657 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5658 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5659 	"    absolute:   Absolute (standalone) timestamp\n"
5660 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5661 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5662 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5663 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5664 	"\t\t\t  Remove sub-buffer with rmdir\n"
5665 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5666 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5667 	"\t\t\t  option name\n"
5668 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5669 #ifdef CONFIG_DYNAMIC_FTRACE
5670 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5671 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5672 	"\t\t\t  functions\n"
5673 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5674 	"\t     modules: Can select a group via module\n"
5675 	"\t      Format: :mod:<module-name>\n"
5676 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5677 	"\t    triggers: a command to perform when function is hit\n"
5678 	"\t      Format: <function>:<trigger>[:count]\n"
5679 	"\t     trigger: traceon, traceoff\n"
5680 	"\t\t      enable_event:<system>:<event>\n"
5681 	"\t\t      disable_event:<system>:<event>\n"
5682 #ifdef CONFIG_STACKTRACE
5683 	"\t\t      stacktrace\n"
5684 #endif
5685 #ifdef CONFIG_TRACER_SNAPSHOT
5686 	"\t\t      snapshot\n"
5687 #endif
5688 	"\t\t      dump\n"
5689 	"\t\t      cpudump\n"
5690 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5691 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5692 	"\t     The first one will disable tracing every time do_fault is hit\n"
5693 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5694 	"\t       The first time do trap is hit and it disables tracing, the\n"
5695 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5696 	"\t       the counter will not decrement. It only decrements when the\n"
5697 	"\t       trigger did work\n"
5698 	"\t     To remove trigger without count:\n"
5699 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5700 	"\t     To remove trigger with a count:\n"
5701 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5702 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5703 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5704 	"\t    modules: Can select a group via module command :mod:\n"
5705 	"\t    Does not accept triggers\n"
5706 #endif /* CONFIG_DYNAMIC_FTRACE */
5707 #ifdef CONFIG_FUNCTION_TRACER
5708 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5709 	"\t\t    (function)\n"
5710 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5711 	"\t\t    (function)\n"
5712 #endif
5713 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5714 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5715 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5716 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5717 #endif
5718 #ifdef CONFIG_TRACER_SNAPSHOT
5719 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5720 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5721 	"\t\t\t  information\n"
5722 #endif
5723 #ifdef CONFIG_STACK_TRACER
5724 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5725 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5726 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5727 	"\t\t\t  new trace)\n"
5728 #ifdef CONFIG_DYNAMIC_FTRACE
5729 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5730 	"\t\t\t  traces\n"
5731 #endif
5732 #endif /* CONFIG_STACK_TRACER */
5733 #ifdef CONFIG_DYNAMIC_EVENTS
5734 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5735 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5736 #endif
5737 #ifdef CONFIG_KPROBE_EVENTS
5738 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5739 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5740 #endif
5741 #ifdef CONFIG_UPROBE_EVENTS
5742 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5743 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5744 #endif
5745 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5746     defined(CONFIG_FPROBE_EVENTS)
5747 	"\t  accepts: event-definitions (one definition per line)\n"
5748 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5749 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5750 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5751 #endif
5752 #ifdef CONFIG_FPROBE_EVENTS
5753 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5754 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5755 #endif
5756 #ifdef CONFIG_HIST_TRIGGERS
5757 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5758 #endif
5759 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5760 	"\t           -:[<group>/][<event>]\n"
5761 #ifdef CONFIG_KPROBE_EVENTS
5762 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5763   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5764 #endif
5765 #ifdef CONFIG_UPROBE_EVENTS
5766   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5767 #endif
5768 	"\t     args: <name>=fetcharg[:type]\n"
5769 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5770 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5771 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5772 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5773 	"\t           <argname>[->field[->field|.field...]],\n"
5774 #endif
5775 #else
5776 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5777 #endif
5778 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5779 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5780 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5781 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5782 	"\t           symstr, <type>\\[<array-size>\\]\n"
5783 #ifdef CONFIG_HIST_TRIGGERS
5784 	"\t    field: <stype> <name>;\n"
5785 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5786 	"\t           [unsigned] char/int/long\n"
5787 #endif
5788 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5789 	"\t            of the <attached-group>/<attached-event>.\n"
5790 #endif
5791 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5792 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5793 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5794 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5795 	"\t\t\t  events\n"
5796 	"      filter\t\t- If set, only events passing filter are traced\n"
5797 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5798 	"\t\t\t  <event>:\n"
5799 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5800 	"      filter\t\t- If set, only events passing filter are traced\n"
5801 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5802 	"\t    Format: <trigger>[:count][if <filter>]\n"
5803 	"\t   trigger: traceon, traceoff\n"
5804 	"\t            enable_event:<system>:<event>\n"
5805 	"\t            disable_event:<system>:<event>\n"
5806 #ifdef CONFIG_HIST_TRIGGERS
5807 	"\t            enable_hist:<system>:<event>\n"
5808 	"\t            disable_hist:<system>:<event>\n"
5809 #endif
5810 #ifdef CONFIG_STACKTRACE
5811 	"\t\t    stacktrace\n"
5812 #endif
5813 #ifdef CONFIG_TRACER_SNAPSHOT
5814 	"\t\t    snapshot\n"
5815 #endif
5816 #ifdef CONFIG_HIST_TRIGGERS
5817 	"\t\t    hist (see below)\n"
5818 #endif
5819 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5820 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5821 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5822 	"\t                  events/block/block_unplug/trigger\n"
5823 	"\t   The first disables tracing every time block_unplug is hit.\n"
5824 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5825 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5826 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5827 	"\t   Like function triggers, the counter is only decremented if it\n"
5828 	"\t    enabled or disabled tracing.\n"
5829 	"\t   To remove a trigger without a count:\n"
5830 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5831 	"\t   To remove a trigger with a count:\n"
5832 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5833 	"\t   Filters can be ignored when removing a trigger.\n"
5834 #ifdef CONFIG_HIST_TRIGGERS
5835 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5836 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5837 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5838 	"\t            [:values=<field1[,field2,...]>]\n"
5839 	"\t            [:sort=<field1[,field2,...]>]\n"
5840 	"\t            [:size=#entries]\n"
5841 	"\t            [:pause][:continue][:clear]\n"
5842 	"\t            [:name=histname1]\n"
5843 	"\t            [:nohitcount]\n"
5844 	"\t            [:<handler>.<action>]\n"
5845 	"\t            [if <filter>]\n\n"
5846 	"\t    Note, special fields can be used as well:\n"
5847 	"\t            common_timestamp - to record current timestamp\n"
5848 	"\t            common_cpu - to record the CPU the event happened on\n"
5849 	"\n"
5850 	"\t    A hist trigger variable can be:\n"
5851 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5852 	"\t        - a reference to another variable e.g. y=$x,\n"
5853 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5854 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5855 	"\n"
5856 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5857 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5858 	"\t    variable reference, field or numeric literal.\n"
5859 	"\n"
5860 	"\t    When a matching event is hit, an entry is added to a hash\n"
5861 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5862 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5863 	"\t    correspond to fields in the event's format description.  Keys\n"
5864 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5865 	"\t    Compound keys consisting of up to two fields can be specified\n"
5866 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5867 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5868 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5869 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5870 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5871 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5872 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5873 	"\t    its histogram data will be shared with other triggers of the\n"
5874 	"\t    same name, and trigger hits will update this common data.\n\n"
5875 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5876 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5877 	"\t    triggers attached to an event, there will be a table for each\n"
5878 	"\t    trigger in the output.  The table displayed for a named\n"
5879 	"\t    trigger will be the same as any other instance having the\n"
5880 	"\t    same name.  The default format used to display a given field\n"
5881 	"\t    can be modified by appending any of the following modifiers\n"
5882 	"\t    to the field name, as applicable:\n\n"
5883 	"\t            .hex        display a number as a hex value\n"
5884 	"\t            .sym        display an address as a symbol\n"
5885 	"\t            .sym-offset display an address as a symbol and offset\n"
5886 	"\t            .execname   display a common_pid as a program name\n"
5887 	"\t            .syscall    display a syscall id as a syscall name\n"
5888 	"\t            .log2       display log2 value rather than raw number\n"
5889 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5890 	"\t            .usecs      display a common_timestamp in microseconds\n"
5891 	"\t            .percent    display a number of percentage value\n"
5892 	"\t            .graph      display a bar-graph of a value\n\n"
5893 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5894 	"\t    trigger or to start a hist trigger but not log any events\n"
5895 	"\t    until told to do so.  'continue' can be used to start or\n"
5896 	"\t    restart a paused hist trigger.\n\n"
5897 	"\t    The 'clear' parameter will clear the contents of a running\n"
5898 	"\t    hist trigger and leave its current paused/active state\n"
5899 	"\t    unchanged.\n\n"
5900 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5901 	"\t    raw hitcount in the histogram.\n\n"
5902 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5903 	"\t    have one event conditionally start and stop another event's\n"
5904 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5905 	"\t    the enable_event and disable_event triggers.\n\n"
5906 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5907 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5908 	"\t        <handler>.<action>\n\n"
5909 	"\t    The available handlers are:\n\n"
5910 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5911 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5912 	"\t        onchange(var)            - invoke action if var changes\n\n"
5913 	"\t    The available actions are:\n\n"
5914 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5915 	"\t        save(field,...)                      - save current event fields\n"
5916 #ifdef CONFIG_TRACER_SNAPSHOT
5917 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5918 #endif
5919 #ifdef CONFIG_SYNTH_EVENTS
5920 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5921 	"\t  Write into this file to define/undefine new synthetic events.\n"
5922 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5923 #endif
5924 #endif
5925 ;
5926 
5927 static ssize_t
5928 tracing_readme_read(struct file *filp, char __user *ubuf,
5929 		       size_t cnt, loff_t *ppos)
5930 {
5931 	return simple_read_from_buffer(ubuf, cnt, ppos,
5932 					readme_msg, strlen(readme_msg));
5933 }
5934 
5935 static const struct file_operations tracing_readme_fops = {
5936 	.open		= tracing_open_generic,
5937 	.read		= tracing_readme_read,
5938 	.llseek		= generic_file_llseek,
5939 };
5940 
5941 static void *saved_tgids_next(struct seq_file *m, void *v, loff_t *pos)
5942 {
5943 	int pid = ++(*pos);
5944 
5945 	return trace_find_tgid_ptr(pid);
5946 }
5947 
5948 static void *saved_tgids_start(struct seq_file *m, loff_t *pos)
5949 {
5950 	int pid = *pos;
5951 
5952 	return trace_find_tgid_ptr(pid);
5953 }
5954 
5955 static void saved_tgids_stop(struct seq_file *m, void *v)
5956 {
5957 }
5958 
5959 static int saved_tgids_show(struct seq_file *m, void *v)
5960 {
5961 	int *entry = (int *)v;
5962 	int pid = entry - tgid_map;
5963 	int tgid = *entry;
5964 
5965 	if (tgid == 0)
5966 		return SEQ_SKIP;
5967 
5968 	seq_printf(m, "%d %d\n", pid, tgid);
5969 	return 0;
5970 }
5971 
5972 static const struct seq_operations tracing_saved_tgids_seq_ops = {
5973 	.start		= saved_tgids_start,
5974 	.stop		= saved_tgids_stop,
5975 	.next		= saved_tgids_next,
5976 	.show		= saved_tgids_show,
5977 };
5978 
5979 static int tracing_saved_tgids_open(struct inode *inode, struct file *filp)
5980 {
5981 	int ret;
5982 
5983 	ret = tracing_check_open_get_tr(NULL);
5984 	if (ret)
5985 		return ret;
5986 
5987 	return seq_open(filp, &tracing_saved_tgids_seq_ops);
5988 }
5989 
5990 
5991 static const struct file_operations tracing_saved_tgids_fops = {
5992 	.open		= tracing_saved_tgids_open,
5993 	.read		= seq_read,
5994 	.llseek		= seq_lseek,
5995 	.release	= seq_release,
5996 };
5997 
5998 static void *saved_cmdlines_next(struct seq_file *m, void *v, loff_t *pos)
5999 {
6000 	unsigned int *ptr = v;
6001 
6002 	if (*pos || m->count)
6003 		ptr++;
6004 
6005 	(*pos)++;
6006 
6007 	for (; ptr < &savedcmd->map_cmdline_to_pid[savedcmd->cmdline_num];
6008 	     ptr++) {
6009 		if (*ptr == -1 || *ptr == NO_CMDLINE_MAP)
6010 			continue;
6011 
6012 		return ptr;
6013 	}
6014 
6015 	return NULL;
6016 }
6017 
6018 static void *saved_cmdlines_start(struct seq_file *m, loff_t *pos)
6019 {
6020 	void *v;
6021 	loff_t l = 0;
6022 
6023 	preempt_disable();
6024 	arch_spin_lock(&trace_cmdline_lock);
6025 
6026 	v = &savedcmd->map_cmdline_to_pid[0];
6027 	while (l <= *pos) {
6028 		v = saved_cmdlines_next(m, v, &l);
6029 		if (!v)
6030 			return NULL;
6031 	}
6032 
6033 	return v;
6034 }
6035 
6036 static void saved_cmdlines_stop(struct seq_file *m, void *v)
6037 {
6038 	arch_spin_unlock(&trace_cmdline_lock);
6039 	preempt_enable();
6040 }
6041 
6042 static int saved_cmdlines_show(struct seq_file *m, void *v)
6043 {
6044 	char buf[TASK_COMM_LEN];
6045 	unsigned int *pid = v;
6046 
6047 	__trace_find_cmdline(*pid, buf);
6048 	seq_printf(m, "%d %s\n", *pid, buf);
6049 	return 0;
6050 }
6051 
6052 static const struct seq_operations tracing_saved_cmdlines_seq_ops = {
6053 	.start		= saved_cmdlines_start,
6054 	.next		= saved_cmdlines_next,
6055 	.stop		= saved_cmdlines_stop,
6056 	.show		= saved_cmdlines_show,
6057 };
6058 
6059 static int tracing_saved_cmdlines_open(struct inode *inode, struct file *filp)
6060 {
6061 	int ret;
6062 
6063 	ret = tracing_check_open_get_tr(NULL);
6064 	if (ret)
6065 		return ret;
6066 
6067 	return seq_open(filp, &tracing_saved_cmdlines_seq_ops);
6068 }
6069 
6070 static const struct file_operations tracing_saved_cmdlines_fops = {
6071 	.open		= tracing_saved_cmdlines_open,
6072 	.read		= seq_read,
6073 	.llseek		= seq_lseek,
6074 	.release	= seq_release,
6075 };
6076 
6077 static ssize_t
6078 tracing_saved_cmdlines_size_read(struct file *filp, char __user *ubuf,
6079 				 size_t cnt, loff_t *ppos)
6080 {
6081 	char buf[64];
6082 	int r;
6083 
6084 	preempt_disable();
6085 	arch_spin_lock(&trace_cmdline_lock);
6086 	r = scnprintf(buf, sizeof(buf), "%u\n", savedcmd->cmdline_num);
6087 	arch_spin_unlock(&trace_cmdline_lock);
6088 	preempt_enable();
6089 
6090 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6091 }
6092 
6093 static int tracing_resize_saved_cmdlines(unsigned int val)
6094 {
6095 	struct saved_cmdlines_buffer *s, *savedcmd_temp;
6096 
6097 	s = allocate_cmdlines_buffer(val);
6098 	if (!s)
6099 		return -ENOMEM;
6100 
6101 	preempt_disable();
6102 	arch_spin_lock(&trace_cmdline_lock);
6103 	savedcmd_temp = savedcmd;
6104 	savedcmd = s;
6105 	arch_spin_unlock(&trace_cmdline_lock);
6106 	preempt_enable();
6107 	free_saved_cmdlines_buffer(savedcmd_temp);
6108 
6109 	return 0;
6110 }
6111 
6112 static ssize_t
6113 tracing_saved_cmdlines_size_write(struct file *filp, const char __user *ubuf,
6114 				  size_t cnt, loff_t *ppos)
6115 {
6116 	unsigned long val;
6117 	int ret;
6118 
6119 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6120 	if (ret)
6121 		return ret;
6122 
6123 	/* must have at least 1 entry or less than PID_MAX_DEFAULT */
6124 	if (!val || val > PID_MAX_DEFAULT)
6125 		return -EINVAL;
6126 
6127 	ret = tracing_resize_saved_cmdlines((unsigned int)val);
6128 	if (ret < 0)
6129 		return ret;
6130 
6131 	*ppos += cnt;
6132 
6133 	return cnt;
6134 }
6135 
6136 static const struct file_operations tracing_saved_cmdlines_size_fops = {
6137 	.open		= tracing_open_generic,
6138 	.read		= tracing_saved_cmdlines_size_read,
6139 	.write		= tracing_saved_cmdlines_size_write,
6140 };
6141 
6142 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
6143 static union trace_eval_map_item *
6144 update_eval_map(union trace_eval_map_item *ptr)
6145 {
6146 	if (!ptr->map.eval_string) {
6147 		if (ptr->tail.next) {
6148 			ptr = ptr->tail.next;
6149 			/* Set ptr to the next real item (skip head) */
6150 			ptr++;
6151 		} else
6152 			return NULL;
6153 	}
6154 	return ptr;
6155 }
6156 
6157 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
6158 {
6159 	union trace_eval_map_item *ptr = v;
6160 
6161 	/*
6162 	 * Paranoid! If ptr points to end, we don't want to increment past it.
6163 	 * This really should never happen.
6164 	 */
6165 	(*pos)++;
6166 	ptr = update_eval_map(ptr);
6167 	if (WARN_ON_ONCE(!ptr))
6168 		return NULL;
6169 
6170 	ptr++;
6171 	ptr = update_eval_map(ptr);
6172 
6173 	return ptr;
6174 }
6175 
6176 static void *eval_map_start(struct seq_file *m, loff_t *pos)
6177 {
6178 	union trace_eval_map_item *v;
6179 	loff_t l = 0;
6180 
6181 	mutex_lock(&trace_eval_mutex);
6182 
6183 	v = trace_eval_maps;
6184 	if (v)
6185 		v++;
6186 
6187 	while (v && l < *pos) {
6188 		v = eval_map_next(m, v, &l);
6189 	}
6190 
6191 	return v;
6192 }
6193 
6194 static void eval_map_stop(struct seq_file *m, void *v)
6195 {
6196 	mutex_unlock(&trace_eval_mutex);
6197 }
6198 
6199 static int eval_map_show(struct seq_file *m, void *v)
6200 {
6201 	union trace_eval_map_item *ptr = v;
6202 
6203 	seq_printf(m, "%s %ld (%s)\n",
6204 		   ptr->map.eval_string, ptr->map.eval_value,
6205 		   ptr->map.system);
6206 
6207 	return 0;
6208 }
6209 
6210 static const struct seq_operations tracing_eval_map_seq_ops = {
6211 	.start		= eval_map_start,
6212 	.next		= eval_map_next,
6213 	.stop		= eval_map_stop,
6214 	.show		= eval_map_show,
6215 };
6216 
6217 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
6218 {
6219 	int ret;
6220 
6221 	ret = tracing_check_open_get_tr(NULL);
6222 	if (ret)
6223 		return ret;
6224 
6225 	return seq_open(filp, &tracing_eval_map_seq_ops);
6226 }
6227 
6228 static const struct file_operations tracing_eval_map_fops = {
6229 	.open		= tracing_eval_map_open,
6230 	.read		= seq_read,
6231 	.llseek		= seq_lseek,
6232 	.release	= seq_release,
6233 };
6234 
6235 static inline union trace_eval_map_item *
6236 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
6237 {
6238 	/* Return tail of array given the head */
6239 	return ptr + ptr->head.length + 1;
6240 }
6241 
6242 static void
6243 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
6244 			   int len)
6245 {
6246 	struct trace_eval_map **stop;
6247 	struct trace_eval_map **map;
6248 	union trace_eval_map_item *map_array;
6249 	union trace_eval_map_item *ptr;
6250 
6251 	stop = start + len;
6252 
6253 	/*
6254 	 * The trace_eval_maps contains the map plus a head and tail item,
6255 	 * where the head holds the module and length of array, and the
6256 	 * tail holds a pointer to the next list.
6257 	 */
6258 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
6259 	if (!map_array) {
6260 		pr_warn("Unable to allocate trace eval mapping\n");
6261 		return;
6262 	}
6263 
6264 	mutex_lock(&trace_eval_mutex);
6265 
6266 	if (!trace_eval_maps)
6267 		trace_eval_maps = map_array;
6268 	else {
6269 		ptr = trace_eval_maps;
6270 		for (;;) {
6271 			ptr = trace_eval_jmp_to_tail(ptr);
6272 			if (!ptr->tail.next)
6273 				break;
6274 			ptr = ptr->tail.next;
6275 
6276 		}
6277 		ptr->tail.next = map_array;
6278 	}
6279 	map_array->head.mod = mod;
6280 	map_array->head.length = len;
6281 	map_array++;
6282 
6283 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
6284 		map_array->map = **map;
6285 		map_array++;
6286 	}
6287 	memset(map_array, 0, sizeof(*map_array));
6288 
6289 	mutex_unlock(&trace_eval_mutex);
6290 }
6291 
6292 static void trace_create_eval_file(struct dentry *d_tracer)
6293 {
6294 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
6295 			  NULL, &tracing_eval_map_fops);
6296 }
6297 
6298 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
6299 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
6300 static inline void trace_insert_eval_map_file(struct module *mod,
6301 			      struct trace_eval_map **start, int len) { }
6302 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
6303 
6304 static void trace_insert_eval_map(struct module *mod,
6305 				  struct trace_eval_map **start, int len)
6306 {
6307 	struct trace_eval_map **map;
6308 
6309 	if (len <= 0)
6310 		return;
6311 
6312 	map = start;
6313 
6314 	trace_event_eval_update(map, len);
6315 
6316 	trace_insert_eval_map_file(mod, start, len);
6317 }
6318 
6319 static ssize_t
6320 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6321 		       size_t cnt, loff_t *ppos)
6322 {
6323 	struct trace_array *tr = filp->private_data;
6324 	char buf[MAX_TRACER_SIZE+2];
6325 	int r;
6326 
6327 	mutex_lock(&trace_types_lock);
6328 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6329 	mutex_unlock(&trace_types_lock);
6330 
6331 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6332 }
6333 
6334 int tracer_init(struct tracer *t, struct trace_array *tr)
6335 {
6336 	tracing_reset_online_cpus(&tr->array_buffer);
6337 	return t->init(tr);
6338 }
6339 
6340 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6341 {
6342 	int cpu;
6343 
6344 	for_each_tracing_cpu(cpu)
6345 		per_cpu_ptr(buf->data, cpu)->entries = val;
6346 }
6347 
6348 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6349 {
6350 	if (cpu == RING_BUFFER_ALL_CPUS) {
6351 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6352 	} else {
6353 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6354 	}
6355 }
6356 
6357 #ifdef CONFIG_TRACER_MAX_TRACE
6358 /* resize @tr's buffer to the size of @size_tr's entries */
6359 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6360 					struct array_buffer *size_buf, int cpu_id)
6361 {
6362 	int cpu, ret = 0;
6363 
6364 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6365 		for_each_tracing_cpu(cpu) {
6366 			ret = ring_buffer_resize(trace_buf->buffer,
6367 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6368 			if (ret < 0)
6369 				break;
6370 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6371 				per_cpu_ptr(size_buf->data, cpu)->entries;
6372 		}
6373 	} else {
6374 		ret = ring_buffer_resize(trace_buf->buffer,
6375 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6376 		if (ret == 0)
6377 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6378 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6379 	}
6380 
6381 	return ret;
6382 }
6383 #endif /* CONFIG_TRACER_MAX_TRACE */
6384 
6385 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6386 					unsigned long size, int cpu)
6387 {
6388 	int ret;
6389 
6390 	/*
6391 	 * If kernel or user changes the size of the ring buffer
6392 	 * we use the size that was given, and we can forget about
6393 	 * expanding it later.
6394 	 */
6395 	trace_set_ring_buffer_expanded(tr);
6396 
6397 	/* May be called before buffers are initialized */
6398 	if (!tr->array_buffer.buffer)
6399 		return 0;
6400 
6401 	/* Do not allow tracing while resizing ring buffer */
6402 	tracing_stop_tr(tr);
6403 
6404 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6405 	if (ret < 0)
6406 		goto out_start;
6407 
6408 #ifdef CONFIG_TRACER_MAX_TRACE
6409 	if (!tr->allocated_snapshot)
6410 		goto out;
6411 
6412 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6413 	if (ret < 0) {
6414 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6415 						     &tr->array_buffer, cpu);
6416 		if (r < 0) {
6417 			/*
6418 			 * AARGH! We are left with different
6419 			 * size max buffer!!!!
6420 			 * The max buffer is our "snapshot" buffer.
6421 			 * When a tracer needs a snapshot (one of the
6422 			 * latency tracers), it swaps the max buffer
6423 			 * with the saved snap shot. We succeeded to
6424 			 * update the size of the main buffer, but failed to
6425 			 * update the size of the max buffer. But when we tried
6426 			 * to reset the main buffer to the original size, we
6427 			 * failed there too. This is very unlikely to
6428 			 * happen, but if it does, warn and kill all
6429 			 * tracing.
6430 			 */
6431 			WARN_ON(1);
6432 			tracing_disabled = 1;
6433 		}
6434 		goto out_start;
6435 	}
6436 
6437 	update_buffer_entries(&tr->max_buffer, cpu);
6438 
6439  out:
6440 #endif /* CONFIG_TRACER_MAX_TRACE */
6441 
6442 	update_buffer_entries(&tr->array_buffer, cpu);
6443  out_start:
6444 	tracing_start_tr(tr);
6445 	return ret;
6446 }
6447 
6448 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6449 				  unsigned long size, int cpu_id)
6450 {
6451 	int ret;
6452 
6453 	mutex_lock(&trace_types_lock);
6454 
6455 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6456 		/* make sure, this cpu is enabled in the mask */
6457 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6458 			ret = -EINVAL;
6459 			goto out;
6460 		}
6461 	}
6462 
6463 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6464 	if (ret < 0)
6465 		ret = -ENOMEM;
6466 
6467 out:
6468 	mutex_unlock(&trace_types_lock);
6469 
6470 	return ret;
6471 }
6472 
6473 
6474 /**
6475  * tracing_update_buffers - used by tracing facility to expand ring buffers
6476  * @tr: The tracing instance
6477  *
6478  * To save on memory when the tracing is never used on a system with it
6479  * configured in. The ring buffers are set to a minimum size. But once
6480  * a user starts to use the tracing facility, then they need to grow
6481  * to their default size.
6482  *
6483  * This function is to be called when a tracer is about to be used.
6484  */
6485 int tracing_update_buffers(struct trace_array *tr)
6486 {
6487 	int ret = 0;
6488 
6489 	mutex_lock(&trace_types_lock);
6490 	if (!tr->ring_buffer_expanded)
6491 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6492 						RING_BUFFER_ALL_CPUS);
6493 	mutex_unlock(&trace_types_lock);
6494 
6495 	return ret;
6496 }
6497 
6498 struct trace_option_dentry;
6499 
6500 static void
6501 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6502 
6503 /*
6504  * Used to clear out the tracer before deletion of an instance.
6505  * Must have trace_types_lock held.
6506  */
6507 static void tracing_set_nop(struct trace_array *tr)
6508 {
6509 	if (tr->current_trace == &nop_trace)
6510 		return;
6511 
6512 	tr->current_trace->enabled--;
6513 
6514 	if (tr->current_trace->reset)
6515 		tr->current_trace->reset(tr);
6516 
6517 	tr->current_trace = &nop_trace;
6518 }
6519 
6520 static bool tracer_options_updated;
6521 
6522 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6523 {
6524 	/* Only enable if the directory has been created already. */
6525 	if (!tr->dir)
6526 		return;
6527 
6528 	/* Only create trace option files after update_tracer_options finish */
6529 	if (!tracer_options_updated)
6530 		return;
6531 
6532 	create_trace_option_files(tr, t);
6533 }
6534 
6535 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6536 {
6537 	struct tracer *t;
6538 #ifdef CONFIG_TRACER_MAX_TRACE
6539 	bool had_max_tr;
6540 #endif
6541 	int ret = 0;
6542 
6543 	mutex_lock(&trace_types_lock);
6544 
6545 	if (!tr->ring_buffer_expanded) {
6546 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6547 						RING_BUFFER_ALL_CPUS);
6548 		if (ret < 0)
6549 			goto out;
6550 		ret = 0;
6551 	}
6552 
6553 	for (t = trace_types; t; t = t->next) {
6554 		if (strcmp(t->name, buf) == 0)
6555 			break;
6556 	}
6557 	if (!t) {
6558 		ret = -EINVAL;
6559 		goto out;
6560 	}
6561 	if (t == tr->current_trace)
6562 		goto out;
6563 
6564 #ifdef CONFIG_TRACER_SNAPSHOT
6565 	if (t->use_max_tr) {
6566 		local_irq_disable();
6567 		arch_spin_lock(&tr->max_lock);
6568 		if (tr->cond_snapshot)
6569 			ret = -EBUSY;
6570 		arch_spin_unlock(&tr->max_lock);
6571 		local_irq_enable();
6572 		if (ret)
6573 			goto out;
6574 	}
6575 #endif
6576 	/* Some tracers won't work on kernel command line */
6577 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6578 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6579 			t->name);
6580 		goto out;
6581 	}
6582 
6583 	/* Some tracers are only allowed for the top level buffer */
6584 	if (!trace_ok_for_array(t, tr)) {
6585 		ret = -EINVAL;
6586 		goto out;
6587 	}
6588 
6589 	/* If trace pipe files are being read, we can't change the tracer */
6590 	if (tr->trace_ref) {
6591 		ret = -EBUSY;
6592 		goto out;
6593 	}
6594 
6595 	trace_branch_disable();
6596 
6597 	tr->current_trace->enabled--;
6598 
6599 	if (tr->current_trace->reset)
6600 		tr->current_trace->reset(tr);
6601 
6602 #ifdef CONFIG_TRACER_MAX_TRACE
6603 	had_max_tr = tr->current_trace->use_max_tr;
6604 
6605 	/* Current trace needs to be nop_trace before synchronize_rcu */
6606 	tr->current_trace = &nop_trace;
6607 
6608 	if (had_max_tr && !t->use_max_tr) {
6609 		/*
6610 		 * We need to make sure that the update_max_tr sees that
6611 		 * current_trace changed to nop_trace to keep it from
6612 		 * swapping the buffers after we resize it.
6613 		 * The update_max_tr is called from interrupts disabled
6614 		 * so a synchronized_sched() is sufficient.
6615 		 */
6616 		synchronize_rcu();
6617 		free_snapshot(tr);
6618 	}
6619 
6620 	if (t->use_max_tr && !tr->allocated_snapshot) {
6621 		ret = tracing_alloc_snapshot_instance(tr);
6622 		if (ret < 0)
6623 			goto out;
6624 	}
6625 #else
6626 	tr->current_trace = &nop_trace;
6627 #endif
6628 
6629 	if (t->init) {
6630 		ret = tracer_init(t, tr);
6631 		if (ret)
6632 			goto out;
6633 	}
6634 
6635 	tr->current_trace = t;
6636 	tr->current_trace->enabled++;
6637 	trace_branch_enable(tr);
6638  out:
6639 	mutex_unlock(&trace_types_lock);
6640 
6641 	return ret;
6642 }
6643 
6644 static ssize_t
6645 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6646 			size_t cnt, loff_t *ppos)
6647 {
6648 	struct trace_array *tr = filp->private_data;
6649 	char buf[MAX_TRACER_SIZE+1];
6650 	char *name;
6651 	size_t ret;
6652 	int err;
6653 
6654 	ret = cnt;
6655 
6656 	if (cnt > MAX_TRACER_SIZE)
6657 		cnt = MAX_TRACER_SIZE;
6658 
6659 	if (copy_from_user(buf, ubuf, cnt))
6660 		return -EFAULT;
6661 
6662 	buf[cnt] = 0;
6663 
6664 	name = strim(buf);
6665 
6666 	err = tracing_set_tracer(tr, name);
6667 	if (err)
6668 		return err;
6669 
6670 	*ppos += ret;
6671 
6672 	return ret;
6673 }
6674 
6675 static ssize_t
6676 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6677 		   size_t cnt, loff_t *ppos)
6678 {
6679 	char buf[64];
6680 	int r;
6681 
6682 	r = snprintf(buf, sizeof(buf), "%ld\n",
6683 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6684 	if (r > sizeof(buf))
6685 		r = sizeof(buf);
6686 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6687 }
6688 
6689 static ssize_t
6690 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6691 		    size_t cnt, loff_t *ppos)
6692 {
6693 	unsigned long val;
6694 	int ret;
6695 
6696 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6697 	if (ret)
6698 		return ret;
6699 
6700 	*ptr = val * 1000;
6701 
6702 	return cnt;
6703 }
6704 
6705 static ssize_t
6706 tracing_thresh_read(struct file *filp, char __user *ubuf,
6707 		    size_t cnt, loff_t *ppos)
6708 {
6709 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6710 }
6711 
6712 static ssize_t
6713 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6714 		     size_t cnt, loff_t *ppos)
6715 {
6716 	struct trace_array *tr = filp->private_data;
6717 	int ret;
6718 
6719 	mutex_lock(&trace_types_lock);
6720 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6721 	if (ret < 0)
6722 		goto out;
6723 
6724 	if (tr->current_trace->update_thresh) {
6725 		ret = tr->current_trace->update_thresh(tr);
6726 		if (ret < 0)
6727 			goto out;
6728 	}
6729 
6730 	ret = cnt;
6731 out:
6732 	mutex_unlock(&trace_types_lock);
6733 
6734 	return ret;
6735 }
6736 
6737 #ifdef CONFIG_TRACER_MAX_TRACE
6738 
6739 static ssize_t
6740 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6741 		     size_t cnt, loff_t *ppos)
6742 {
6743 	struct trace_array *tr = filp->private_data;
6744 
6745 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6746 }
6747 
6748 static ssize_t
6749 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6750 		      size_t cnt, loff_t *ppos)
6751 {
6752 	struct trace_array *tr = filp->private_data;
6753 
6754 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6755 }
6756 
6757 #endif
6758 
6759 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6760 {
6761 	if (cpu == RING_BUFFER_ALL_CPUS) {
6762 		if (cpumask_empty(tr->pipe_cpumask)) {
6763 			cpumask_setall(tr->pipe_cpumask);
6764 			return 0;
6765 		}
6766 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6767 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6768 		return 0;
6769 	}
6770 	return -EBUSY;
6771 }
6772 
6773 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6774 {
6775 	if (cpu == RING_BUFFER_ALL_CPUS) {
6776 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6777 		cpumask_clear(tr->pipe_cpumask);
6778 	} else {
6779 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6780 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6781 	}
6782 }
6783 
6784 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6785 {
6786 	struct trace_array *tr = inode->i_private;
6787 	struct trace_iterator *iter;
6788 	int cpu;
6789 	int ret;
6790 
6791 	ret = tracing_check_open_get_tr(tr);
6792 	if (ret)
6793 		return ret;
6794 
6795 	mutex_lock(&trace_types_lock);
6796 	cpu = tracing_get_cpu(inode);
6797 	ret = open_pipe_on_cpu(tr, cpu);
6798 	if (ret)
6799 		goto fail_pipe_on_cpu;
6800 
6801 	/* create a buffer to store the information to pass to userspace */
6802 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6803 	if (!iter) {
6804 		ret = -ENOMEM;
6805 		goto fail_alloc_iter;
6806 	}
6807 
6808 	trace_seq_init(&iter->seq);
6809 	iter->trace = tr->current_trace;
6810 
6811 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6812 		ret = -ENOMEM;
6813 		goto fail;
6814 	}
6815 
6816 	/* trace pipe does not show start of buffer */
6817 	cpumask_setall(iter->started);
6818 
6819 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6820 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6821 
6822 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6823 	if (trace_clocks[tr->clock_id].in_ns)
6824 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6825 
6826 	iter->tr = tr;
6827 	iter->array_buffer = &tr->array_buffer;
6828 	iter->cpu_file = cpu;
6829 	mutex_init(&iter->mutex);
6830 	filp->private_data = iter;
6831 
6832 	if (iter->trace->pipe_open)
6833 		iter->trace->pipe_open(iter);
6834 
6835 	nonseekable_open(inode, filp);
6836 
6837 	tr->trace_ref++;
6838 
6839 	mutex_unlock(&trace_types_lock);
6840 	return ret;
6841 
6842 fail:
6843 	kfree(iter);
6844 fail_alloc_iter:
6845 	close_pipe_on_cpu(tr, cpu);
6846 fail_pipe_on_cpu:
6847 	__trace_array_put(tr);
6848 	mutex_unlock(&trace_types_lock);
6849 	return ret;
6850 }
6851 
6852 static int tracing_release_pipe(struct inode *inode, struct file *file)
6853 {
6854 	struct trace_iterator *iter = file->private_data;
6855 	struct trace_array *tr = inode->i_private;
6856 
6857 	mutex_lock(&trace_types_lock);
6858 
6859 	tr->trace_ref--;
6860 
6861 	if (iter->trace->pipe_close)
6862 		iter->trace->pipe_close(iter);
6863 	close_pipe_on_cpu(tr, iter->cpu_file);
6864 	mutex_unlock(&trace_types_lock);
6865 
6866 	free_trace_iter_content(iter);
6867 	kfree(iter);
6868 
6869 	trace_array_put(tr);
6870 
6871 	return 0;
6872 }
6873 
6874 static __poll_t
6875 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6876 {
6877 	struct trace_array *tr = iter->tr;
6878 
6879 	/* Iterators are static, they should be filled or empty */
6880 	if (trace_buffer_iter(iter, iter->cpu_file))
6881 		return EPOLLIN | EPOLLRDNORM;
6882 
6883 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6884 		/*
6885 		 * Always select as readable when in blocking mode
6886 		 */
6887 		return EPOLLIN | EPOLLRDNORM;
6888 	else
6889 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6890 					     filp, poll_table, iter->tr->buffer_percent);
6891 }
6892 
6893 static __poll_t
6894 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6895 {
6896 	struct trace_iterator *iter = filp->private_data;
6897 
6898 	return trace_poll(iter, filp, poll_table);
6899 }
6900 
6901 /* Must be called with iter->mutex held. */
6902 static int tracing_wait_pipe(struct file *filp)
6903 {
6904 	struct trace_iterator *iter = filp->private_data;
6905 	int ret;
6906 
6907 	while (trace_empty(iter)) {
6908 
6909 		if ((filp->f_flags & O_NONBLOCK)) {
6910 			return -EAGAIN;
6911 		}
6912 
6913 		/*
6914 		 * We block until we read something and tracing is disabled.
6915 		 * We still block if tracing is disabled, but we have never
6916 		 * read anything. This allows a user to cat this file, and
6917 		 * then enable tracing. But after we have read something,
6918 		 * we give an EOF when tracing is again disabled.
6919 		 *
6920 		 * iter->pos will be 0 if we haven't read anything.
6921 		 */
6922 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6923 			break;
6924 
6925 		mutex_unlock(&iter->mutex);
6926 
6927 		ret = wait_on_pipe(iter, 0);
6928 
6929 		mutex_lock(&iter->mutex);
6930 
6931 		if (ret)
6932 			return ret;
6933 	}
6934 
6935 	return 1;
6936 }
6937 
6938 /*
6939  * Consumer reader.
6940  */
6941 static ssize_t
6942 tracing_read_pipe(struct file *filp, char __user *ubuf,
6943 		  size_t cnt, loff_t *ppos)
6944 {
6945 	struct trace_iterator *iter = filp->private_data;
6946 	ssize_t sret;
6947 
6948 	/*
6949 	 * Avoid more than one consumer on a single file descriptor
6950 	 * This is just a matter of traces coherency, the ring buffer itself
6951 	 * is protected.
6952 	 */
6953 	mutex_lock(&iter->mutex);
6954 
6955 	/* return any leftover data */
6956 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6957 	if (sret != -EBUSY)
6958 		goto out;
6959 
6960 	trace_seq_init(&iter->seq);
6961 
6962 	if (iter->trace->read) {
6963 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6964 		if (sret)
6965 			goto out;
6966 	}
6967 
6968 waitagain:
6969 	sret = tracing_wait_pipe(filp);
6970 	if (sret <= 0)
6971 		goto out;
6972 
6973 	/* stop when tracing is finished */
6974 	if (trace_empty(iter)) {
6975 		sret = 0;
6976 		goto out;
6977 	}
6978 
6979 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6980 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6981 
6982 	/* reset all but tr, trace, and overruns */
6983 	trace_iterator_reset(iter);
6984 	cpumask_clear(iter->started);
6985 	trace_seq_init(&iter->seq);
6986 
6987 	trace_event_read_lock();
6988 	trace_access_lock(iter->cpu_file);
6989 	while (trace_find_next_entry_inc(iter) != NULL) {
6990 		enum print_line_t ret;
6991 		int save_len = iter->seq.seq.len;
6992 
6993 		ret = print_trace_line(iter);
6994 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6995 			/*
6996 			 * If one print_trace_line() fills entire trace_seq in one shot,
6997 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6998 			 * In this case, we need to consume it, otherwise, loop will peek
6999 			 * this event next time, resulting in an infinite loop.
7000 			 */
7001 			if (save_len == 0) {
7002 				iter->seq.full = 0;
7003 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
7004 				trace_consume(iter);
7005 				break;
7006 			}
7007 
7008 			/* In other cases, don't print partial lines */
7009 			iter->seq.seq.len = save_len;
7010 			break;
7011 		}
7012 		if (ret != TRACE_TYPE_NO_CONSUME)
7013 			trace_consume(iter);
7014 
7015 		if (trace_seq_used(&iter->seq) >= cnt)
7016 			break;
7017 
7018 		/*
7019 		 * Setting the full flag means we reached the trace_seq buffer
7020 		 * size and we should leave by partial output condition above.
7021 		 * One of the trace_seq_* functions is not used properly.
7022 		 */
7023 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
7024 			  iter->ent->type);
7025 	}
7026 	trace_access_unlock(iter->cpu_file);
7027 	trace_event_read_unlock();
7028 
7029 	/* Now copy what we have to the user */
7030 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
7031 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
7032 		trace_seq_init(&iter->seq);
7033 
7034 	/*
7035 	 * If there was nothing to send to user, in spite of consuming trace
7036 	 * entries, go back to wait for more entries.
7037 	 */
7038 	if (sret == -EBUSY)
7039 		goto waitagain;
7040 
7041 out:
7042 	mutex_unlock(&iter->mutex);
7043 
7044 	return sret;
7045 }
7046 
7047 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
7048 				     unsigned int idx)
7049 {
7050 	__free_page(spd->pages[idx]);
7051 }
7052 
7053 static size_t
7054 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
7055 {
7056 	size_t count;
7057 	int save_len;
7058 	int ret;
7059 
7060 	/* Seq buffer is page-sized, exactly what we need. */
7061 	for (;;) {
7062 		save_len = iter->seq.seq.len;
7063 		ret = print_trace_line(iter);
7064 
7065 		if (trace_seq_has_overflowed(&iter->seq)) {
7066 			iter->seq.seq.len = save_len;
7067 			break;
7068 		}
7069 
7070 		/*
7071 		 * This should not be hit, because it should only
7072 		 * be set if the iter->seq overflowed. But check it
7073 		 * anyway to be safe.
7074 		 */
7075 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
7076 			iter->seq.seq.len = save_len;
7077 			break;
7078 		}
7079 
7080 		count = trace_seq_used(&iter->seq) - save_len;
7081 		if (rem < count) {
7082 			rem = 0;
7083 			iter->seq.seq.len = save_len;
7084 			break;
7085 		}
7086 
7087 		if (ret != TRACE_TYPE_NO_CONSUME)
7088 			trace_consume(iter);
7089 		rem -= count;
7090 		if (!trace_find_next_entry_inc(iter))	{
7091 			rem = 0;
7092 			iter->ent = NULL;
7093 			break;
7094 		}
7095 	}
7096 
7097 	return rem;
7098 }
7099 
7100 static ssize_t tracing_splice_read_pipe(struct file *filp,
7101 					loff_t *ppos,
7102 					struct pipe_inode_info *pipe,
7103 					size_t len,
7104 					unsigned int flags)
7105 {
7106 	struct page *pages_def[PIPE_DEF_BUFFERS];
7107 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
7108 	struct trace_iterator *iter = filp->private_data;
7109 	struct splice_pipe_desc spd = {
7110 		.pages		= pages_def,
7111 		.partial	= partial_def,
7112 		.nr_pages	= 0, /* This gets updated below. */
7113 		.nr_pages_max	= PIPE_DEF_BUFFERS,
7114 		.ops		= &default_pipe_buf_ops,
7115 		.spd_release	= tracing_spd_release_pipe,
7116 	};
7117 	ssize_t ret;
7118 	size_t rem;
7119 	unsigned int i;
7120 
7121 	if (splice_grow_spd(pipe, &spd))
7122 		return -ENOMEM;
7123 
7124 	mutex_lock(&iter->mutex);
7125 
7126 	if (iter->trace->splice_read) {
7127 		ret = iter->trace->splice_read(iter, filp,
7128 					       ppos, pipe, len, flags);
7129 		if (ret)
7130 			goto out_err;
7131 	}
7132 
7133 	ret = tracing_wait_pipe(filp);
7134 	if (ret <= 0)
7135 		goto out_err;
7136 
7137 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
7138 		ret = -EFAULT;
7139 		goto out_err;
7140 	}
7141 
7142 	trace_event_read_lock();
7143 	trace_access_lock(iter->cpu_file);
7144 
7145 	/* Fill as many pages as possible. */
7146 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
7147 		spd.pages[i] = alloc_page(GFP_KERNEL);
7148 		if (!spd.pages[i])
7149 			break;
7150 
7151 		rem = tracing_fill_pipe_page(rem, iter);
7152 
7153 		/* Copy the data into the page, so we can start over. */
7154 		ret = trace_seq_to_buffer(&iter->seq,
7155 					  page_address(spd.pages[i]),
7156 					  trace_seq_used(&iter->seq));
7157 		if (ret < 0) {
7158 			__free_page(spd.pages[i]);
7159 			break;
7160 		}
7161 		spd.partial[i].offset = 0;
7162 		spd.partial[i].len = trace_seq_used(&iter->seq);
7163 
7164 		trace_seq_init(&iter->seq);
7165 	}
7166 
7167 	trace_access_unlock(iter->cpu_file);
7168 	trace_event_read_unlock();
7169 	mutex_unlock(&iter->mutex);
7170 
7171 	spd.nr_pages = i;
7172 
7173 	if (i)
7174 		ret = splice_to_pipe(pipe, &spd);
7175 	else
7176 		ret = 0;
7177 out:
7178 	splice_shrink_spd(&spd);
7179 	return ret;
7180 
7181 out_err:
7182 	mutex_unlock(&iter->mutex);
7183 	goto out;
7184 }
7185 
7186 static ssize_t
7187 tracing_entries_read(struct file *filp, char __user *ubuf,
7188 		     size_t cnt, loff_t *ppos)
7189 {
7190 	struct inode *inode = file_inode(filp);
7191 	struct trace_array *tr = inode->i_private;
7192 	int cpu = tracing_get_cpu(inode);
7193 	char buf[64];
7194 	int r = 0;
7195 	ssize_t ret;
7196 
7197 	mutex_lock(&trace_types_lock);
7198 
7199 	if (cpu == RING_BUFFER_ALL_CPUS) {
7200 		int cpu, buf_size_same;
7201 		unsigned long size;
7202 
7203 		size = 0;
7204 		buf_size_same = 1;
7205 		/* check if all cpu sizes are same */
7206 		for_each_tracing_cpu(cpu) {
7207 			/* fill in the size from first enabled cpu */
7208 			if (size == 0)
7209 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
7210 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
7211 				buf_size_same = 0;
7212 				break;
7213 			}
7214 		}
7215 
7216 		if (buf_size_same) {
7217 			if (!tr->ring_buffer_expanded)
7218 				r = sprintf(buf, "%lu (expanded: %lu)\n",
7219 					    size >> 10,
7220 					    trace_buf_size >> 10);
7221 			else
7222 				r = sprintf(buf, "%lu\n", size >> 10);
7223 		} else
7224 			r = sprintf(buf, "X\n");
7225 	} else
7226 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
7227 
7228 	mutex_unlock(&trace_types_lock);
7229 
7230 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7231 	return ret;
7232 }
7233 
7234 static ssize_t
7235 tracing_entries_write(struct file *filp, const char __user *ubuf,
7236 		      size_t cnt, loff_t *ppos)
7237 {
7238 	struct inode *inode = file_inode(filp);
7239 	struct trace_array *tr = inode->i_private;
7240 	unsigned long val;
7241 	int ret;
7242 
7243 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7244 	if (ret)
7245 		return ret;
7246 
7247 	/* must have at least 1 entry */
7248 	if (!val)
7249 		return -EINVAL;
7250 
7251 	/* value is in KB */
7252 	val <<= 10;
7253 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
7254 	if (ret < 0)
7255 		return ret;
7256 
7257 	*ppos += cnt;
7258 
7259 	return cnt;
7260 }
7261 
7262 static ssize_t
7263 tracing_total_entries_read(struct file *filp, char __user *ubuf,
7264 				size_t cnt, loff_t *ppos)
7265 {
7266 	struct trace_array *tr = filp->private_data;
7267 	char buf[64];
7268 	int r, cpu;
7269 	unsigned long size = 0, expanded_size = 0;
7270 
7271 	mutex_lock(&trace_types_lock);
7272 	for_each_tracing_cpu(cpu) {
7273 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
7274 		if (!tr->ring_buffer_expanded)
7275 			expanded_size += trace_buf_size >> 10;
7276 	}
7277 	if (tr->ring_buffer_expanded)
7278 		r = sprintf(buf, "%lu\n", size);
7279 	else
7280 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
7281 	mutex_unlock(&trace_types_lock);
7282 
7283 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
7284 }
7285 
7286 static ssize_t
7287 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7288 			  size_t cnt, loff_t *ppos)
7289 {
7290 	/*
7291 	 * There is no need to read what the user has written, this function
7292 	 * is just to make sure that there is no error when "echo" is used
7293 	 */
7294 
7295 	*ppos += cnt;
7296 
7297 	return cnt;
7298 }
7299 
7300 static int
7301 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7302 {
7303 	struct trace_array *tr = inode->i_private;
7304 
7305 	/* disable tracing ? */
7306 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7307 		tracer_tracing_off(tr);
7308 	/* resize the ring buffer to 0 */
7309 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7310 
7311 	trace_array_put(tr);
7312 
7313 	return 0;
7314 }
7315 
7316 #define TRACE_MARKER_MAX_SIZE		4096
7317 
7318 static ssize_t
7319 tracing_mark_write(struct file *filp, const char __user *ubuf,
7320 					size_t cnt, loff_t *fpos)
7321 {
7322 	struct trace_array *tr = filp->private_data;
7323 	struct ring_buffer_event *event;
7324 	enum event_trigger_type tt = ETT_NONE;
7325 	struct trace_buffer *buffer;
7326 	struct print_entry *entry;
7327 	int meta_size;
7328 	ssize_t written;
7329 	size_t size;
7330 	int len;
7331 
7332 /* Used in tracing_mark_raw_write() as well */
7333 #define FAULTED_STR "<faulted>"
7334 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7335 
7336 	if (tracing_disabled)
7337 		return -EINVAL;
7338 
7339 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7340 		return -EINVAL;
7341 
7342 	if ((ssize_t)cnt < 0)
7343 		return -EINVAL;
7344 
7345 	if (cnt > TRACE_MARKER_MAX_SIZE)
7346 		cnt = TRACE_MARKER_MAX_SIZE;
7347 
7348 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7349  again:
7350 	size = cnt + meta_size;
7351 
7352 	/* If less than "<faulted>", then make sure we can still add that */
7353 	if (cnt < FAULTED_SIZE)
7354 		size += FAULTED_SIZE - cnt;
7355 
7356 	buffer = tr->array_buffer.buffer;
7357 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7358 					    tracing_gen_ctx());
7359 	if (unlikely(!event)) {
7360 		/*
7361 		 * If the size was greater than what was allowed, then
7362 		 * make it smaller and try again.
7363 		 */
7364 		if (size > ring_buffer_max_event_size(buffer)) {
7365 			/* cnt < FAULTED size should never be bigger than max */
7366 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7367 				return -EBADF;
7368 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7369 			/* The above should only happen once */
7370 			if (WARN_ON_ONCE(cnt + meta_size == size))
7371 				return -EBADF;
7372 			goto again;
7373 		}
7374 
7375 		/* Ring buffer disabled, return as if not open for write */
7376 		return -EBADF;
7377 	}
7378 
7379 	entry = ring_buffer_event_data(event);
7380 	entry->ip = _THIS_IP_;
7381 
7382 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7383 	if (len) {
7384 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7385 		cnt = FAULTED_SIZE;
7386 		written = -EFAULT;
7387 	} else
7388 		written = cnt;
7389 
7390 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7391 		/* do not add \n before testing triggers, but add \0 */
7392 		entry->buf[cnt] = '\0';
7393 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7394 	}
7395 
7396 	if (entry->buf[cnt - 1] != '\n') {
7397 		entry->buf[cnt] = '\n';
7398 		entry->buf[cnt + 1] = '\0';
7399 	} else
7400 		entry->buf[cnt] = '\0';
7401 
7402 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7403 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7404 	__buffer_unlock_commit(buffer, event);
7405 
7406 	if (tt)
7407 		event_triggers_post_call(tr->trace_marker_file, tt);
7408 
7409 	return written;
7410 }
7411 
7412 static ssize_t
7413 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7414 					size_t cnt, loff_t *fpos)
7415 {
7416 	struct trace_array *tr = filp->private_data;
7417 	struct ring_buffer_event *event;
7418 	struct trace_buffer *buffer;
7419 	struct raw_data_entry *entry;
7420 	ssize_t written;
7421 	int size;
7422 	int len;
7423 
7424 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7425 
7426 	if (tracing_disabled)
7427 		return -EINVAL;
7428 
7429 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7430 		return -EINVAL;
7431 
7432 	/* The marker must at least have a tag id */
7433 	if (cnt < sizeof(unsigned int))
7434 		return -EINVAL;
7435 
7436 	size = sizeof(*entry) + cnt;
7437 	if (cnt < FAULT_SIZE_ID)
7438 		size += FAULT_SIZE_ID - cnt;
7439 
7440 	buffer = tr->array_buffer.buffer;
7441 
7442 	if (size > ring_buffer_max_event_size(buffer))
7443 		return -EINVAL;
7444 
7445 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7446 					    tracing_gen_ctx());
7447 	if (!event)
7448 		/* Ring buffer disabled, return as if not open for write */
7449 		return -EBADF;
7450 
7451 	entry = ring_buffer_event_data(event);
7452 
7453 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7454 	if (len) {
7455 		entry->id = -1;
7456 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7457 		written = -EFAULT;
7458 	} else
7459 		written = cnt;
7460 
7461 	__buffer_unlock_commit(buffer, event);
7462 
7463 	return written;
7464 }
7465 
7466 static int tracing_clock_show(struct seq_file *m, void *v)
7467 {
7468 	struct trace_array *tr = m->private;
7469 	int i;
7470 
7471 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7472 		seq_printf(m,
7473 			"%s%s%s%s", i ? " " : "",
7474 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7475 			i == tr->clock_id ? "]" : "");
7476 	seq_putc(m, '\n');
7477 
7478 	return 0;
7479 }
7480 
7481 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7482 {
7483 	int i;
7484 
7485 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7486 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7487 			break;
7488 	}
7489 	if (i == ARRAY_SIZE(trace_clocks))
7490 		return -EINVAL;
7491 
7492 	mutex_lock(&trace_types_lock);
7493 
7494 	tr->clock_id = i;
7495 
7496 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7497 
7498 	/*
7499 	 * New clock may not be consistent with the previous clock.
7500 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7501 	 */
7502 	tracing_reset_online_cpus(&tr->array_buffer);
7503 
7504 #ifdef CONFIG_TRACER_MAX_TRACE
7505 	if (tr->max_buffer.buffer)
7506 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7507 	tracing_reset_online_cpus(&tr->max_buffer);
7508 #endif
7509 
7510 	mutex_unlock(&trace_types_lock);
7511 
7512 	return 0;
7513 }
7514 
7515 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7516 				   size_t cnt, loff_t *fpos)
7517 {
7518 	struct seq_file *m = filp->private_data;
7519 	struct trace_array *tr = m->private;
7520 	char buf[64];
7521 	const char *clockstr;
7522 	int ret;
7523 
7524 	if (cnt >= sizeof(buf))
7525 		return -EINVAL;
7526 
7527 	if (copy_from_user(buf, ubuf, cnt))
7528 		return -EFAULT;
7529 
7530 	buf[cnt] = 0;
7531 
7532 	clockstr = strstrip(buf);
7533 
7534 	ret = tracing_set_clock(tr, clockstr);
7535 	if (ret)
7536 		return ret;
7537 
7538 	*fpos += cnt;
7539 
7540 	return cnt;
7541 }
7542 
7543 static int tracing_clock_open(struct inode *inode, struct file *file)
7544 {
7545 	struct trace_array *tr = inode->i_private;
7546 	int ret;
7547 
7548 	ret = tracing_check_open_get_tr(tr);
7549 	if (ret)
7550 		return ret;
7551 
7552 	ret = single_open(file, tracing_clock_show, inode->i_private);
7553 	if (ret < 0)
7554 		trace_array_put(tr);
7555 
7556 	return ret;
7557 }
7558 
7559 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7560 {
7561 	struct trace_array *tr = m->private;
7562 
7563 	mutex_lock(&trace_types_lock);
7564 
7565 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7566 		seq_puts(m, "delta [absolute]\n");
7567 	else
7568 		seq_puts(m, "[delta] absolute\n");
7569 
7570 	mutex_unlock(&trace_types_lock);
7571 
7572 	return 0;
7573 }
7574 
7575 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7576 {
7577 	struct trace_array *tr = inode->i_private;
7578 	int ret;
7579 
7580 	ret = tracing_check_open_get_tr(tr);
7581 	if (ret)
7582 		return ret;
7583 
7584 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7585 	if (ret < 0)
7586 		trace_array_put(tr);
7587 
7588 	return ret;
7589 }
7590 
7591 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7592 {
7593 	if (rbe == this_cpu_read(trace_buffered_event))
7594 		return ring_buffer_time_stamp(buffer);
7595 
7596 	return ring_buffer_event_time_stamp(buffer, rbe);
7597 }
7598 
7599 /*
7600  * Set or disable using the per CPU trace_buffer_event when possible.
7601  */
7602 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7603 {
7604 	int ret = 0;
7605 
7606 	mutex_lock(&trace_types_lock);
7607 
7608 	if (set && tr->no_filter_buffering_ref++)
7609 		goto out;
7610 
7611 	if (!set) {
7612 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7613 			ret = -EINVAL;
7614 			goto out;
7615 		}
7616 
7617 		--tr->no_filter_buffering_ref;
7618 	}
7619  out:
7620 	mutex_unlock(&trace_types_lock);
7621 
7622 	return ret;
7623 }
7624 
7625 struct ftrace_buffer_info {
7626 	struct trace_iterator	iter;
7627 	void			*spare;
7628 	unsigned int		spare_cpu;
7629 	unsigned int		spare_size;
7630 	unsigned int		read;
7631 };
7632 
7633 #ifdef CONFIG_TRACER_SNAPSHOT
7634 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7635 {
7636 	struct trace_array *tr = inode->i_private;
7637 	struct trace_iterator *iter;
7638 	struct seq_file *m;
7639 	int ret;
7640 
7641 	ret = tracing_check_open_get_tr(tr);
7642 	if (ret)
7643 		return ret;
7644 
7645 	if (file->f_mode & FMODE_READ) {
7646 		iter = __tracing_open(inode, file, true);
7647 		if (IS_ERR(iter))
7648 			ret = PTR_ERR(iter);
7649 	} else {
7650 		/* Writes still need the seq_file to hold the private data */
7651 		ret = -ENOMEM;
7652 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7653 		if (!m)
7654 			goto out;
7655 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7656 		if (!iter) {
7657 			kfree(m);
7658 			goto out;
7659 		}
7660 		ret = 0;
7661 
7662 		iter->tr = tr;
7663 		iter->array_buffer = &tr->max_buffer;
7664 		iter->cpu_file = tracing_get_cpu(inode);
7665 		m->private = iter;
7666 		file->private_data = m;
7667 	}
7668 out:
7669 	if (ret < 0)
7670 		trace_array_put(tr);
7671 
7672 	return ret;
7673 }
7674 
7675 static void tracing_swap_cpu_buffer(void *tr)
7676 {
7677 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7678 }
7679 
7680 static ssize_t
7681 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7682 		       loff_t *ppos)
7683 {
7684 	struct seq_file *m = filp->private_data;
7685 	struct trace_iterator *iter = m->private;
7686 	struct trace_array *tr = iter->tr;
7687 	unsigned long val;
7688 	int ret;
7689 
7690 	ret = tracing_update_buffers(tr);
7691 	if (ret < 0)
7692 		return ret;
7693 
7694 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7695 	if (ret)
7696 		return ret;
7697 
7698 	mutex_lock(&trace_types_lock);
7699 
7700 	if (tr->current_trace->use_max_tr) {
7701 		ret = -EBUSY;
7702 		goto out;
7703 	}
7704 
7705 	local_irq_disable();
7706 	arch_spin_lock(&tr->max_lock);
7707 	if (tr->cond_snapshot)
7708 		ret = -EBUSY;
7709 	arch_spin_unlock(&tr->max_lock);
7710 	local_irq_enable();
7711 	if (ret)
7712 		goto out;
7713 
7714 	switch (val) {
7715 	case 0:
7716 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7717 			ret = -EINVAL;
7718 			break;
7719 		}
7720 		if (tr->allocated_snapshot)
7721 			free_snapshot(tr);
7722 		break;
7723 	case 1:
7724 /* Only allow per-cpu swap if the ring buffer supports it */
7725 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7726 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7727 			ret = -EINVAL;
7728 			break;
7729 		}
7730 #endif
7731 		if (tr->allocated_snapshot)
7732 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7733 					&tr->array_buffer, iter->cpu_file);
7734 		else
7735 			ret = tracing_alloc_snapshot_instance(tr);
7736 		if (ret < 0)
7737 			break;
7738 		/* Now, we're going to swap */
7739 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7740 			local_irq_disable();
7741 			update_max_tr(tr, current, smp_processor_id(), NULL);
7742 			local_irq_enable();
7743 		} else {
7744 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7745 						 (void *)tr, 1);
7746 		}
7747 		break;
7748 	default:
7749 		if (tr->allocated_snapshot) {
7750 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7751 				tracing_reset_online_cpus(&tr->max_buffer);
7752 			else
7753 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7754 		}
7755 		break;
7756 	}
7757 
7758 	if (ret >= 0) {
7759 		*ppos += cnt;
7760 		ret = cnt;
7761 	}
7762 out:
7763 	mutex_unlock(&trace_types_lock);
7764 	return ret;
7765 }
7766 
7767 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7768 {
7769 	struct seq_file *m = file->private_data;
7770 	int ret;
7771 
7772 	ret = tracing_release(inode, file);
7773 
7774 	if (file->f_mode & FMODE_READ)
7775 		return ret;
7776 
7777 	/* If write only, the seq_file is just a stub */
7778 	if (m)
7779 		kfree(m->private);
7780 	kfree(m);
7781 
7782 	return 0;
7783 }
7784 
7785 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7786 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7787 				    size_t count, loff_t *ppos);
7788 static int tracing_buffers_release(struct inode *inode, struct file *file);
7789 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7790 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7791 
7792 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7793 {
7794 	struct ftrace_buffer_info *info;
7795 	int ret;
7796 
7797 	/* The following checks for tracefs lockdown */
7798 	ret = tracing_buffers_open(inode, filp);
7799 	if (ret < 0)
7800 		return ret;
7801 
7802 	info = filp->private_data;
7803 
7804 	if (info->iter.trace->use_max_tr) {
7805 		tracing_buffers_release(inode, filp);
7806 		return -EBUSY;
7807 	}
7808 
7809 	info->iter.snapshot = true;
7810 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7811 
7812 	return ret;
7813 }
7814 
7815 #endif /* CONFIG_TRACER_SNAPSHOT */
7816 
7817 
7818 static const struct file_operations tracing_thresh_fops = {
7819 	.open		= tracing_open_generic,
7820 	.read		= tracing_thresh_read,
7821 	.write		= tracing_thresh_write,
7822 	.llseek		= generic_file_llseek,
7823 };
7824 
7825 #ifdef CONFIG_TRACER_MAX_TRACE
7826 static const struct file_operations tracing_max_lat_fops = {
7827 	.open		= tracing_open_generic_tr,
7828 	.read		= tracing_max_lat_read,
7829 	.write		= tracing_max_lat_write,
7830 	.llseek		= generic_file_llseek,
7831 	.release	= tracing_release_generic_tr,
7832 };
7833 #endif
7834 
7835 static const struct file_operations set_tracer_fops = {
7836 	.open		= tracing_open_generic_tr,
7837 	.read		= tracing_set_trace_read,
7838 	.write		= tracing_set_trace_write,
7839 	.llseek		= generic_file_llseek,
7840 	.release	= tracing_release_generic_tr,
7841 };
7842 
7843 static const struct file_operations tracing_pipe_fops = {
7844 	.open		= tracing_open_pipe,
7845 	.poll		= tracing_poll_pipe,
7846 	.read		= tracing_read_pipe,
7847 	.splice_read	= tracing_splice_read_pipe,
7848 	.release	= tracing_release_pipe,
7849 	.llseek		= no_llseek,
7850 };
7851 
7852 static const struct file_operations tracing_entries_fops = {
7853 	.open		= tracing_open_generic_tr,
7854 	.read		= tracing_entries_read,
7855 	.write		= tracing_entries_write,
7856 	.llseek		= generic_file_llseek,
7857 	.release	= tracing_release_generic_tr,
7858 };
7859 
7860 static const struct file_operations tracing_total_entries_fops = {
7861 	.open		= tracing_open_generic_tr,
7862 	.read		= tracing_total_entries_read,
7863 	.llseek		= generic_file_llseek,
7864 	.release	= tracing_release_generic_tr,
7865 };
7866 
7867 static const struct file_operations tracing_free_buffer_fops = {
7868 	.open		= tracing_open_generic_tr,
7869 	.write		= tracing_free_buffer_write,
7870 	.release	= tracing_free_buffer_release,
7871 };
7872 
7873 static const struct file_operations tracing_mark_fops = {
7874 	.open		= tracing_mark_open,
7875 	.write		= tracing_mark_write,
7876 	.release	= tracing_release_generic_tr,
7877 };
7878 
7879 static const struct file_operations tracing_mark_raw_fops = {
7880 	.open		= tracing_mark_open,
7881 	.write		= tracing_mark_raw_write,
7882 	.release	= tracing_release_generic_tr,
7883 };
7884 
7885 static const struct file_operations trace_clock_fops = {
7886 	.open		= tracing_clock_open,
7887 	.read		= seq_read,
7888 	.llseek		= seq_lseek,
7889 	.release	= tracing_single_release_tr,
7890 	.write		= tracing_clock_write,
7891 };
7892 
7893 static const struct file_operations trace_time_stamp_mode_fops = {
7894 	.open		= tracing_time_stamp_mode_open,
7895 	.read		= seq_read,
7896 	.llseek		= seq_lseek,
7897 	.release	= tracing_single_release_tr,
7898 };
7899 
7900 #ifdef CONFIG_TRACER_SNAPSHOT
7901 static const struct file_operations snapshot_fops = {
7902 	.open		= tracing_snapshot_open,
7903 	.read		= seq_read,
7904 	.write		= tracing_snapshot_write,
7905 	.llseek		= tracing_lseek,
7906 	.release	= tracing_snapshot_release,
7907 };
7908 
7909 static const struct file_operations snapshot_raw_fops = {
7910 	.open		= snapshot_raw_open,
7911 	.read		= tracing_buffers_read,
7912 	.release	= tracing_buffers_release,
7913 	.splice_read	= tracing_buffers_splice_read,
7914 	.llseek		= no_llseek,
7915 };
7916 
7917 #endif /* CONFIG_TRACER_SNAPSHOT */
7918 
7919 /*
7920  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7921  * @filp: The active open file structure
7922  * @ubuf: The userspace provided buffer to read value into
7923  * @cnt: The maximum number of bytes to read
7924  * @ppos: The current "file" position
7925  *
7926  * This function implements the write interface for a struct trace_min_max_param.
7927  * The filp->private_data must point to a trace_min_max_param structure that
7928  * defines where to write the value, the min and the max acceptable values,
7929  * and a lock to protect the write.
7930  */
7931 static ssize_t
7932 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7933 {
7934 	struct trace_min_max_param *param = filp->private_data;
7935 	u64 val;
7936 	int err;
7937 
7938 	if (!param)
7939 		return -EFAULT;
7940 
7941 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7942 	if (err)
7943 		return err;
7944 
7945 	if (param->lock)
7946 		mutex_lock(param->lock);
7947 
7948 	if (param->min && val < *param->min)
7949 		err = -EINVAL;
7950 
7951 	if (param->max && val > *param->max)
7952 		err = -EINVAL;
7953 
7954 	if (!err)
7955 		*param->val = val;
7956 
7957 	if (param->lock)
7958 		mutex_unlock(param->lock);
7959 
7960 	if (err)
7961 		return err;
7962 
7963 	return cnt;
7964 }
7965 
7966 /*
7967  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7968  * @filp: The active open file structure
7969  * @ubuf: The userspace provided buffer to read value into
7970  * @cnt: The maximum number of bytes to read
7971  * @ppos: The current "file" position
7972  *
7973  * This function implements the read interface for a struct trace_min_max_param.
7974  * The filp->private_data must point to a trace_min_max_param struct with valid
7975  * data.
7976  */
7977 static ssize_t
7978 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7979 {
7980 	struct trace_min_max_param *param = filp->private_data;
7981 	char buf[U64_STR_SIZE];
7982 	int len;
7983 	u64 val;
7984 
7985 	if (!param)
7986 		return -EFAULT;
7987 
7988 	val = *param->val;
7989 
7990 	if (cnt > sizeof(buf))
7991 		cnt = sizeof(buf);
7992 
7993 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7994 
7995 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7996 }
7997 
7998 const struct file_operations trace_min_max_fops = {
7999 	.open		= tracing_open_generic,
8000 	.read		= trace_min_max_read,
8001 	.write		= trace_min_max_write,
8002 };
8003 
8004 #define TRACING_LOG_ERRS_MAX	8
8005 #define TRACING_LOG_LOC_MAX	128
8006 
8007 #define CMD_PREFIX "  Command: "
8008 
8009 struct err_info {
8010 	const char	**errs;	/* ptr to loc-specific array of err strings */
8011 	u8		type;	/* index into errs -> specific err string */
8012 	u16		pos;	/* caret position */
8013 	u64		ts;
8014 };
8015 
8016 struct tracing_log_err {
8017 	struct list_head	list;
8018 	struct err_info		info;
8019 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
8020 	char			*cmd;                     /* what caused err */
8021 };
8022 
8023 static DEFINE_MUTEX(tracing_err_log_lock);
8024 
8025 static struct tracing_log_err *alloc_tracing_log_err(int len)
8026 {
8027 	struct tracing_log_err *err;
8028 
8029 	err = kzalloc(sizeof(*err), GFP_KERNEL);
8030 	if (!err)
8031 		return ERR_PTR(-ENOMEM);
8032 
8033 	err->cmd = kzalloc(len, GFP_KERNEL);
8034 	if (!err->cmd) {
8035 		kfree(err);
8036 		return ERR_PTR(-ENOMEM);
8037 	}
8038 
8039 	return err;
8040 }
8041 
8042 static void free_tracing_log_err(struct tracing_log_err *err)
8043 {
8044 	kfree(err->cmd);
8045 	kfree(err);
8046 }
8047 
8048 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
8049 						   int len)
8050 {
8051 	struct tracing_log_err *err;
8052 	char *cmd;
8053 
8054 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
8055 		err = alloc_tracing_log_err(len);
8056 		if (PTR_ERR(err) != -ENOMEM)
8057 			tr->n_err_log_entries++;
8058 
8059 		return err;
8060 	}
8061 	cmd = kzalloc(len, GFP_KERNEL);
8062 	if (!cmd)
8063 		return ERR_PTR(-ENOMEM);
8064 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
8065 	kfree(err->cmd);
8066 	err->cmd = cmd;
8067 	list_del(&err->list);
8068 
8069 	return err;
8070 }
8071 
8072 /**
8073  * err_pos - find the position of a string within a command for error careting
8074  * @cmd: The tracing command that caused the error
8075  * @str: The string to position the caret at within @cmd
8076  *
8077  * Finds the position of the first occurrence of @str within @cmd.  The
8078  * return value can be passed to tracing_log_err() for caret placement
8079  * within @cmd.
8080  *
8081  * Returns the index within @cmd of the first occurrence of @str or 0
8082  * if @str was not found.
8083  */
8084 unsigned int err_pos(char *cmd, const char *str)
8085 {
8086 	char *found;
8087 
8088 	if (WARN_ON(!strlen(cmd)))
8089 		return 0;
8090 
8091 	found = strstr(cmd, str);
8092 	if (found)
8093 		return found - cmd;
8094 
8095 	return 0;
8096 }
8097 
8098 /**
8099  * tracing_log_err - write an error to the tracing error log
8100  * @tr: The associated trace array for the error (NULL for top level array)
8101  * @loc: A string describing where the error occurred
8102  * @cmd: The tracing command that caused the error
8103  * @errs: The array of loc-specific static error strings
8104  * @type: The index into errs[], which produces the specific static err string
8105  * @pos: The position the caret should be placed in the cmd
8106  *
8107  * Writes an error into tracing/error_log of the form:
8108  *
8109  * <loc>: error: <text>
8110  *   Command: <cmd>
8111  *              ^
8112  *
8113  * tracing/error_log is a small log file containing the last
8114  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
8115  * unless there has been a tracing error, and the error log can be
8116  * cleared and have its memory freed by writing the empty string in
8117  * truncation mode to it i.e. echo > tracing/error_log.
8118  *
8119  * NOTE: the @errs array along with the @type param are used to
8120  * produce a static error string - this string is not copied and saved
8121  * when the error is logged - only a pointer to it is saved.  See
8122  * existing callers for examples of how static strings are typically
8123  * defined for use with tracing_log_err().
8124  */
8125 void tracing_log_err(struct trace_array *tr,
8126 		     const char *loc, const char *cmd,
8127 		     const char **errs, u8 type, u16 pos)
8128 {
8129 	struct tracing_log_err *err;
8130 	int len = 0;
8131 
8132 	if (!tr)
8133 		tr = &global_trace;
8134 
8135 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
8136 
8137 	mutex_lock(&tracing_err_log_lock);
8138 	err = get_tracing_log_err(tr, len);
8139 	if (PTR_ERR(err) == -ENOMEM) {
8140 		mutex_unlock(&tracing_err_log_lock);
8141 		return;
8142 	}
8143 
8144 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
8145 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
8146 
8147 	err->info.errs = errs;
8148 	err->info.type = type;
8149 	err->info.pos = pos;
8150 	err->info.ts = local_clock();
8151 
8152 	list_add_tail(&err->list, &tr->err_log);
8153 	mutex_unlock(&tracing_err_log_lock);
8154 }
8155 
8156 static void clear_tracing_err_log(struct trace_array *tr)
8157 {
8158 	struct tracing_log_err *err, *next;
8159 
8160 	mutex_lock(&tracing_err_log_lock);
8161 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
8162 		list_del(&err->list);
8163 		free_tracing_log_err(err);
8164 	}
8165 
8166 	tr->n_err_log_entries = 0;
8167 	mutex_unlock(&tracing_err_log_lock);
8168 }
8169 
8170 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
8171 {
8172 	struct trace_array *tr = m->private;
8173 
8174 	mutex_lock(&tracing_err_log_lock);
8175 
8176 	return seq_list_start(&tr->err_log, *pos);
8177 }
8178 
8179 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
8180 {
8181 	struct trace_array *tr = m->private;
8182 
8183 	return seq_list_next(v, &tr->err_log, pos);
8184 }
8185 
8186 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
8187 {
8188 	mutex_unlock(&tracing_err_log_lock);
8189 }
8190 
8191 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
8192 {
8193 	u16 i;
8194 
8195 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
8196 		seq_putc(m, ' ');
8197 	for (i = 0; i < pos; i++)
8198 		seq_putc(m, ' ');
8199 	seq_puts(m, "^\n");
8200 }
8201 
8202 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
8203 {
8204 	struct tracing_log_err *err = v;
8205 
8206 	if (err) {
8207 		const char *err_text = err->info.errs[err->info.type];
8208 		u64 sec = err->info.ts;
8209 		u32 nsec;
8210 
8211 		nsec = do_div(sec, NSEC_PER_SEC);
8212 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
8213 			   err->loc, err_text);
8214 		seq_printf(m, "%s", err->cmd);
8215 		tracing_err_log_show_pos(m, err->info.pos);
8216 	}
8217 
8218 	return 0;
8219 }
8220 
8221 static const struct seq_operations tracing_err_log_seq_ops = {
8222 	.start  = tracing_err_log_seq_start,
8223 	.next   = tracing_err_log_seq_next,
8224 	.stop   = tracing_err_log_seq_stop,
8225 	.show   = tracing_err_log_seq_show
8226 };
8227 
8228 static int tracing_err_log_open(struct inode *inode, struct file *file)
8229 {
8230 	struct trace_array *tr = inode->i_private;
8231 	int ret = 0;
8232 
8233 	ret = tracing_check_open_get_tr(tr);
8234 	if (ret)
8235 		return ret;
8236 
8237 	/* If this file was opened for write, then erase contents */
8238 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
8239 		clear_tracing_err_log(tr);
8240 
8241 	if (file->f_mode & FMODE_READ) {
8242 		ret = seq_open(file, &tracing_err_log_seq_ops);
8243 		if (!ret) {
8244 			struct seq_file *m = file->private_data;
8245 			m->private = tr;
8246 		} else {
8247 			trace_array_put(tr);
8248 		}
8249 	}
8250 	return ret;
8251 }
8252 
8253 static ssize_t tracing_err_log_write(struct file *file,
8254 				     const char __user *buffer,
8255 				     size_t count, loff_t *ppos)
8256 {
8257 	return count;
8258 }
8259 
8260 static int tracing_err_log_release(struct inode *inode, struct file *file)
8261 {
8262 	struct trace_array *tr = inode->i_private;
8263 
8264 	trace_array_put(tr);
8265 
8266 	if (file->f_mode & FMODE_READ)
8267 		seq_release(inode, file);
8268 
8269 	return 0;
8270 }
8271 
8272 static const struct file_operations tracing_err_log_fops = {
8273 	.open           = tracing_err_log_open,
8274 	.write		= tracing_err_log_write,
8275 	.read           = seq_read,
8276 	.llseek         = tracing_lseek,
8277 	.release        = tracing_err_log_release,
8278 };
8279 
8280 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8281 {
8282 	struct trace_array *tr = inode->i_private;
8283 	struct ftrace_buffer_info *info;
8284 	int ret;
8285 
8286 	ret = tracing_check_open_get_tr(tr);
8287 	if (ret)
8288 		return ret;
8289 
8290 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8291 	if (!info) {
8292 		trace_array_put(tr);
8293 		return -ENOMEM;
8294 	}
8295 
8296 	mutex_lock(&trace_types_lock);
8297 
8298 	info->iter.tr		= tr;
8299 	info->iter.cpu_file	= tracing_get_cpu(inode);
8300 	info->iter.trace	= tr->current_trace;
8301 	info->iter.array_buffer = &tr->array_buffer;
8302 	info->spare		= NULL;
8303 	/* Force reading ring buffer for first read */
8304 	info->read		= (unsigned int)-1;
8305 
8306 	filp->private_data = info;
8307 
8308 	tr->trace_ref++;
8309 
8310 	mutex_unlock(&trace_types_lock);
8311 
8312 	ret = nonseekable_open(inode, filp);
8313 	if (ret < 0)
8314 		trace_array_put(tr);
8315 
8316 	return ret;
8317 }
8318 
8319 static __poll_t
8320 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8321 {
8322 	struct ftrace_buffer_info *info = filp->private_data;
8323 	struct trace_iterator *iter = &info->iter;
8324 
8325 	return trace_poll(iter, filp, poll_table);
8326 }
8327 
8328 static ssize_t
8329 tracing_buffers_read(struct file *filp, char __user *ubuf,
8330 		     size_t count, loff_t *ppos)
8331 {
8332 	struct ftrace_buffer_info *info = filp->private_data;
8333 	struct trace_iterator *iter = &info->iter;
8334 	void *trace_data;
8335 	int page_size;
8336 	ssize_t ret = 0;
8337 	ssize_t size;
8338 
8339 	if (!count)
8340 		return 0;
8341 
8342 #ifdef CONFIG_TRACER_MAX_TRACE
8343 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8344 		return -EBUSY;
8345 #endif
8346 
8347 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8348 
8349 	/* Make sure the spare matches the current sub buffer size */
8350 	if (info->spare) {
8351 		if (page_size != info->spare_size) {
8352 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8353 						   info->spare_cpu, info->spare);
8354 			info->spare = NULL;
8355 		}
8356 	}
8357 
8358 	if (!info->spare) {
8359 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8360 							  iter->cpu_file);
8361 		if (IS_ERR(info->spare)) {
8362 			ret = PTR_ERR(info->spare);
8363 			info->spare = NULL;
8364 		} else {
8365 			info->spare_cpu = iter->cpu_file;
8366 			info->spare_size = page_size;
8367 		}
8368 	}
8369 	if (!info->spare)
8370 		return ret;
8371 
8372 	/* Do we have previous read data to read? */
8373 	if (info->read < page_size)
8374 		goto read;
8375 
8376  again:
8377 	trace_access_lock(iter->cpu_file);
8378 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8379 				    info->spare,
8380 				    count,
8381 				    iter->cpu_file, 0);
8382 	trace_access_unlock(iter->cpu_file);
8383 
8384 	if (ret < 0) {
8385 		if (trace_empty(iter)) {
8386 			if ((filp->f_flags & O_NONBLOCK))
8387 				return -EAGAIN;
8388 
8389 			ret = wait_on_pipe(iter, 0);
8390 			if (ret)
8391 				return ret;
8392 
8393 			goto again;
8394 		}
8395 		return 0;
8396 	}
8397 
8398 	info->read = 0;
8399  read:
8400 	size = page_size - info->read;
8401 	if (size > count)
8402 		size = count;
8403 	trace_data = ring_buffer_read_page_data(info->spare);
8404 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8405 	if (ret == size)
8406 		return -EFAULT;
8407 
8408 	size -= ret;
8409 
8410 	*ppos += size;
8411 	info->read += size;
8412 
8413 	return size;
8414 }
8415 
8416 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8417 {
8418 	struct ftrace_buffer_info *info = file->private_data;
8419 	struct trace_iterator *iter = &info->iter;
8420 
8421 	iter->closed = true;
8422 	/* Make sure the waiters see the new wait_index */
8423 	(void)atomic_fetch_inc_release(&iter->wait_index);
8424 
8425 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8426 
8427 	return 0;
8428 }
8429 
8430 static int tracing_buffers_release(struct inode *inode, struct file *file)
8431 {
8432 	struct ftrace_buffer_info *info = file->private_data;
8433 	struct trace_iterator *iter = &info->iter;
8434 
8435 	mutex_lock(&trace_types_lock);
8436 
8437 	iter->tr->trace_ref--;
8438 
8439 	__trace_array_put(iter->tr);
8440 
8441 	if (info->spare)
8442 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8443 					   info->spare_cpu, info->spare);
8444 	kvfree(info);
8445 
8446 	mutex_unlock(&trace_types_lock);
8447 
8448 	return 0;
8449 }
8450 
8451 struct buffer_ref {
8452 	struct trace_buffer	*buffer;
8453 	void			*page;
8454 	int			cpu;
8455 	refcount_t		refcount;
8456 };
8457 
8458 static void buffer_ref_release(struct buffer_ref *ref)
8459 {
8460 	if (!refcount_dec_and_test(&ref->refcount))
8461 		return;
8462 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8463 	kfree(ref);
8464 }
8465 
8466 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8467 				    struct pipe_buffer *buf)
8468 {
8469 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8470 
8471 	buffer_ref_release(ref);
8472 	buf->private = 0;
8473 }
8474 
8475 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8476 				struct pipe_buffer *buf)
8477 {
8478 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8479 
8480 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8481 		return false;
8482 
8483 	refcount_inc(&ref->refcount);
8484 	return true;
8485 }
8486 
8487 /* Pipe buffer operations for a buffer. */
8488 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8489 	.release		= buffer_pipe_buf_release,
8490 	.get			= buffer_pipe_buf_get,
8491 };
8492 
8493 /*
8494  * Callback from splice_to_pipe(), if we need to release some pages
8495  * at the end of the spd in case we error'ed out in filling the pipe.
8496  */
8497 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8498 {
8499 	struct buffer_ref *ref =
8500 		(struct buffer_ref *)spd->partial[i].private;
8501 
8502 	buffer_ref_release(ref);
8503 	spd->partial[i].private = 0;
8504 }
8505 
8506 static ssize_t
8507 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8508 			    struct pipe_inode_info *pipe, size_t len,
8509 			    unsigned int flags)
8510 {
8511 	struct ftrace_buffer_info *info = file->private_data;
8512 	struct trace_iterator *iter = &info->iter;
8513 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8514 	struct page *pages_def[PIPE_DEF_BUFFERS];
8515 	struct splice_pipe_desc spd = {
8516 		.pages		= pages_def,
8517 		.partial	= partial_def,
8518 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8519 		.ops		= &buffer_pipe_buf_ops,
8520 		.spd_release	= buffer_spd_release,
8521 	};
8522 	struct buffer_ref *ref;
8523 	bool woken = false;
8524 	int page_size;
8525 	int entries, i;
8526 	ssize_t ret = 0;
8527 
8528 #ifdef CONFIG_TRACER_MAX_TRACE
8529 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8530 		return -EBUSY;
8531 #endif
8532 
8533 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8534 	if (*ppos & (page_size - 1))
8535 		return -EINVAL;
8536 
8537 	if (len & (page_size - 1)) {
8538 		if (len < page_size)
8539 			return -EINVAL;
8540 		len &= (~(page_size - 1));
8541 	}
8542 
8543 	if (splice_grow_spd(pipe, &spd))
8544 		return -ENOMEM;
8545 
8546  again:
8547 	trace_access_lock(iter->cpu_file);
8548 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8549 
8550 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8551 		struct page *page;
8552 		int r;
8553 
8554 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8555 		if (!ref) {
8556 			ret = -ENOMEM;
8557 			break;
8558 		}
8559 
8560 		refcount_set(&ref->refcount, 1);
8561 		ref->buffer = iter->array_buffer->buffer;
8562 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8563 		if (IS_ERR(ref->page)) {
8564 			ret = PTR_ERR(ref->page);
8565 			ref->page = NULL;
8566 			kfree(ref);
8567 			break;
8568 		}
8569 		ref->cpu = iter->cpu_file;
8570 
8571 		r = ring_buffer_read_page(ref->buffer, ref->page,
8572 					  len, iter->cpu_file, 1);
8573 		if (r < 0) {
8574 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8575 						   ref->page);
8576 			kfree(ref);
8577 			break;
8578 		}
8579 
8580 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8581 
8582 		spd.pages[i] = page;
8583 		spd.partial[i].len = page_size;
8584 		spd.partial[i].offset = 0;
8585 		spd.partial[i].private = (unsigned long)ref;
8586 		spd.nr_pages++;
8587 		*ppos += page_size;
8588 
8589 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8590 	}
8591 
8592 	trace_access_unlock(iter->cpu_file);
8593 	spd.nr_pages = i;
8594 
8595 	/* did we read anything? */
8596 	if (!spd.nr_pages) {
8597 
8598 		if (ret)
8599 			goto out;
8600 
8601 		if (woken)
8602 			goto out;
8603 
8604 		ret = -EAGAIN;
8605 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8606 			goto out;
8607 
8608 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8609 		if (ret)
8610 			goto out;
8611 
8612 		/* No need to wait after waking up when tracing is off */
8613 		if (!tracer_tracing_is_on(iter->tr))
8614 			goto out;
8615 
8616 		/* Iterate one more time to collect any new data then exit */
8617 		woken = true;
8618 
8619 		goto again;
8620 	}
8621 
8622 	ret = splice_to_pipe(pipe, &spd);
8623 out:
8624 	splice_shrink_spd(&spd);
8625 
8626 	return ret;
8627 }
8628 
8629 /* An ioctl call with cmd 0 to the ring buffer file will wake up all waiters */
8630 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8631 {
8632 	struct ftrace_buffer_info *info = file->private_data;
8633 	struct trace_iterator *iter = &info->iter;
8634 
8635 	if (cmd)
8636 		return -ENOIOCTLCMD;
8637 
8638 	mutex_lock(&trace_types_lock);
8639 
8640 	/* Make sure the waiters see the new wait_index */
8641 	(void)atomic_fetch_inc_release(&iter->wait_index);
8642 
8643 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8644 
8645 	mutex_unlock(&trace_types_lock);
8646 	return 0;
8647 }
8648 
8649 static const struct file_operations tracing_buffers_fops = {
8650 	.open		= tracing_buffers_open,
8651 	.read		= tracing_buffers_read,
8652 	.poll		= tracing_buffers_poll,
8653 	.release	= tracing_buffers_release,
8654 	.flush		= tracing_buffers_flush,
8655 	.splice_read	= tracing_buffers_splice_read,
8656 	.unlocked_ioctl = tracing_buffers_ioctl,
8657 	.llseek		= no_llseek,
8658 };
8659 
8660 static ssize_t
8661 tracing_stats_read(struct file *filp, char __user *ubuf,
8662 		   size_t count, loff_t *ppos)
8663 {
8664 	struct inode *inode = file_inode(filp);
8665 	struct trace_array *tr = inode->i_private;
8666 	struct array_buffer *trace_buf = &tr->array_buffer;
8667 	int cpu = tracing_get_cpu(inode);
8668 	struct trace_seq *s;
8669 	unsigned long cnt;
8670 	unsigned long long t;
8671 	unsigned long usec_rem;
8672 
8673 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8674 	if (!s)
8675 		return -ENOMEM;
8676 
8677 	trace_seq_init(s);
8678 
8679 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8680 	trace_seq_printf(s, "entries: %ld\n", cnt);
8681 
8682 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8683 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8684 
8685 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8686 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8687 
8688 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8689 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8690 
8691 	if (trace_clocks[tr->clock_id].in_ns) {
8692 		/* local or global for trace_clock */
8693 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8694 		usec_rem = do_div(t, USEC_PER_SEC);
8695 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8696 								t, usec_rem);
8697 
8698 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8699 		usec_rem = do_div(t, USEC_PER_SEC);
8700 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8701 	} else {
8702 		/* counter or tsc mode for trace_clock */
8703 		trace_seq_printf(s, "oldest event ts: %llu\n",
8704 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8705 
8706 		trace_seq_printf(s, "now ts: %llu\n",
8707 				ring_buffer_time_stamp(trace_buf->buffer));
8708 	}
8709 
8710 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8711 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8712 
8713 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8714 	trace_seq_printf(s, "read events: %ld\n", cnt);
8715 
8716 	count = simple_read_from_buffer(ubuf, count, ppos,
8717 					s->buffer, trace_seq_used(s));
8718 
8719 	kfree(s);
8720 
8721 	return count;
8722 }
8723 
8724 static const struct file_operations tracing_stats_fops = {
8725 	.open		= tracing_open_generic_tr,
8726 	.read		= tracing_stats_read,
8727 	.llseek		= generic_file_llseek,
8728 	.release	= tracing_release_generic_tr,
8729 };
8730 
8731 #ifdef CONFIG_DYNAMIC_FTRACE
8732 
8733 static ssize_t
8734 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8735 		  size_t cnt, loff_t *ppos)
8736 {
8737 	ssize_t ret;
8738 	char *buf;
8739 	int r;
8740 
8741 	/* 256 should be plenty to hold the amount needed */
8742 	buf = kmalloc(256, GFP_KERNEL);
8743 	if (!buf)
8744 		return -ENOMEM;
8745 
8746 	r = scnprintf(buf, 256, "%ld pages:%ld groups: %ld\n",
8747 		      ftrace_update_tot_cnt,
8748 		      ftrace_number_of_pages,
8749 		      ftrace_number_of_groups);
8750 
8751 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8752 	kfree(buf);
8753 	return ret;
8754 }
8755 
8756 static const struct file_operations tracing_dyn_info_fops = {
8757 	.open		= tracing_open_generic,
8758 	.read		= tracing_read_dyn_info,
8759 	.llseek		= generic_file_llseek,
8760 };
8761 #endif /* CONFIG_DYNAMIC_FTRACE */
8762 
8763 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8764 static void
8765 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8766 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8767 		void *data)
8768 {
8769 	tracing_snapshot_instance(tr);
8770 }
8771 
8772 static void
8773 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8774 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8775 		      void *data)
8776 {
8777 	struct ftrace_func_mapper *mapper = data;
8778 	long *count = NULL;
8779 
8780 	if (mapper)
8781 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8782 
8783 	if (count) {
8784 
8785 		if (*count <= 0)
8786 			return;
8787 
8788 		(*count)--;
8789 	}
8790 
8791 	tracing_snapshot_instance(tr);
8792 }
8793 
8794 static int
8795 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8796 		      struct ftrace_probe_ops *ops, void *data)
8797 {
8798 	struct ftrace_func_mapper *mapper = data;
8799 	long *count = NULL;
8800 
8801 	seq_printf(m, "%ps:", (void *)ip);
8802 
8803 	seq_puts(m, "snapshot");
8804 
8805 	if (mapper)
8806 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8807 
8808 	if (count)
8809 		seq_printf(m, ":count=%ld\n", *count);
8810 	else
8811 		seq_puts(m, ":unlimited\n");
8812 
8813 	return 0;
8814 }
8815 
8816 static int
8817 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8818 		     unsigned long ip, void *init_data, void **data)
8819 {
8820 	struct ftrace_func_mapper *mapper = *data;
8821 
8822 	if (!mapper) {
8823 		mapper = allocate_ftrace_func_mapper();
8824 		if (!mapper)
8825 			return -ENOMEM;
8826 		*data = mapper;
8827 	}
8828 
8829 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8830 }
8831 
8832 static void
8833 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8834 		     unsigned long ip, void *data)
8835 {
8836 	struct ftrace_func_mapper *mapper = data;
8837 
8838 	if (!ip) {
8839 		if (!mapper)
8840 			return;
8841 		free_ftrace_func_mapper(mapper, NULL);
8842 		return;
8843 	}
8844 
8845 	ftrace_func_mapper_remove_ip(mapper, ip);
8846 }
8847 
8848 static struct ftrace_probe_ops snapshot_probe_ops = {
8849 	.func			= ftrace_snapshot,
8850 	.print			= ftrace_snapshot_print,
8851 };
8852 
8853 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8854 	.func			= ftrace_count_snapshot,
8855 	.print			= ftrace_snapshot_print,
8856 	.init			= ftrace_snapshot_init,
8857 	.free			= ftrace_snapshot_free,
8858 };
8859 
8860 static int
8861 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8862 			       char *glob, char *cmd, char *param, int enable)
8863 {
8864 	struct ftrace_probe_ops *ops;
8865 	void *count = (void *)-1;
8866 	char *number;
8867 	int ret;
8868 
8869 	if (!tr)
8870 		return -ENODEV;
8871 
8872 	/* hash funcs only work with set_ftrace_filter */
8873 	if (!enable)
8874 		return -EINVAL;
8875 
8876 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8877 
8878 	if (glob[0] == '!')
8879 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
8880 
8881 	if (!param)
8882 		goto out_reg;
8883 
8884 	number = strsep(&param, ":");
8885 
8886 	if (!strlen(number))
8887 		goto out_reg;
8888 
8889 	/*
8890 	 * We use the callback data field (which is a pointer)
8891 	 * as our counter.
8892 	 */
8893 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8894 	if (ret)
8895 		return ret;
8896 
8897  out_reg:
8898 	ret = tracing_alloc_snapshot_instance(tr);
8899 	if (ret < 0)
8900 		goto out;
8901 
8902 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8903 
8904  out:
8905 	return ret < 0 ? ret : 0;
8906 }
8907 
8908 static struct ftrace_func_command ftrace_snapshot_cmd = {
8909 	.name			= "snapshot",
8910 	.func			= ftrace_trace_snapshot_callback,
8911 };
8912 
8913 static __init int register_snapshot_cmd(void)
8914 {
8915 	return register_ftrace_command(&ftrace_snapshot_cmd);
8916 }
8917 #else
8918 static inline __init int register_snapshot_cmd(void) { return 0; }
8919 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8920 
8921 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8922 {
8923 	if (WARN_ON(!tr->dir))
8924 		return ERR_PTR(-ENODEV);
8925 
8926 	/* Top directory uses NULL as the parent */
8927 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8928 		return NULL;
8929 
8930 	/* All sub buffers have a descriptor */
8931 	return tr->dir;
8932 }
8933 
8934 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8935 {
8936 	struct dentry *d_tracer;
8937 
8938 	if (tr->percpu_dir)
8939 		return tr->percpu_dir;
8940 
8941 	d_tracer = tracing_get_dentry(tr);
8942 	if (IS_ERR(d_tracer))
8943 		return NULL;
8944 
8945 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8946 
8947 	MEM_FAIL(!tr->percpu_dir,
8948 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8949 
8950 	return tr->percpu_dir;
8951 }
8952 
8953 static struct dentry *
8954 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8955 		      void *data, long cpu, const struct file_operations *fops)
8956 {
8957 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8958 
8959 	if (ret) /* See tracing_get_cpu() */
8960 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8961 	return ret;
8962 }
8963 
8964 static void
8965 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8966 {
8967 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8968 	struct dentry *d_cpu;
8969 	char cpu_dir[30]; /* 30 characters should be more than enough */
8970 
8971 	if (!d_percpu)
8972 		return;
8973 
8974 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8975 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8976 	if (!d_cpu) {
8977 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8978 		return;
8979 	}
8980 
8981 	/* per cpu trace_pipe */
8982 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8983 				tr, cpu, &tracing_pipe_fops);
8984 
8985 	/* per cpu trace */
8986 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8987 				tr, cpu, &tracing_fops);
8988 
8989 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8990 				tr, cpu, &tracing_buffers_fops);
8991 
8992 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8993 				tr, cpu, &tracing_stats_fops);
8994 
8995 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8996 				tr, cpu, &tracing_entries_fops);
8997 
8998 #ifdef CONFIG_TRACER_SNAPSHOT
8999 	trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
9000 				tr, cpu, &snapshot_fops);
9001 
9002 	trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
9003 				tr, cpu, &snapshot_raw_fops);
9004 #endif
9005 }
9006 
9007 #ifdef CONFIG_FTRACE_SELFTEST
9008 /* Let selftest have access to static functions in this file */
9009 #include "trace_selftest.c"
9010 #endif
9011 
9012 static ssize_t
9013 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
9014 			loff_t *ppos)
9015 {
9016 	struct trace_option_dentry *topt = filp->private_data;
9017 	char *buf;
9018 
9019 	if (topt->flags->val & topt->opt->bit)
9020 		buf = "1\n";
9021 	else
9022 		buf = "0\n";
9023 
9024 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9025 }
9026 
9027 static ssize_t
9028 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
9029 			 loff_t *ppos)
9030 {
9031 	struct trace_option_dentry *topt = filp->private_data;
9032 	unsigned long val;
9033 	int ret;
9034 
9035 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9036 	if (ret)
9037 		return ret;
9038 
9039 	if (val != 0 && val != 1)
9040 		return -EINVAL;
9041 
9042 	if (!!(topt->flags->val & topt->opt->bit) != val) {
9043 		mutex_lock(&trace_types_lock);
9044 		ret = __set_tracer_option(topt->tr, topt->flags,
9045 					  topt->opt, !val);
9046 		mutex_unlock(&trace_types_lock);
9047 		if (ret)
9048 			return ret;
9049 	}
9050 
9051 	*ppos += cnt;
9052 
9053 	return cnt;
9054 }
9055 
9056 static int tracing_open_options(struct inode *inode, struct file *filp)
9057 {
9058 	struct trace_option_dentry *topt = inode->i_private;
9059 	int ret;
9060 
9061 	ret = tracing_check_open_get_tr(topt->tr);
9062 	if (ret)
9063 		return ret;
9064 
9065 	filp->private_data = inode->i_private;
9066 	return 0;
9067 }
9068 
9069 static int tracing_release_options(struct inode *inode, struct file *file)
9070 {
9071 	struct trace_option_dentry *topt = file->private_data;
9072 
9073 	trace_array_put(topt->tr);
9074 	return 0;
9075 }
9076 
9077 static const struct file_operations trace_options_fops = {
9078 	.open = tracing_open_options,
9079 	.read = trace_options_read,
9080 	.write = trace_options_write,
9081 	.llseek	= generic_file_llseek,
9082 	.release = tracing_release_options,
9083 };
9084 
9085 /*
9086  * In order to pass in both the trace_array descriptor as well as the index
9087  * to the flag that the trace option file represents, the trace_array
9088  * has a character array of trace_flags_index[], which holds the index
9089  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
9090  * The address of this character array is passed to the flag option file
9091  * read/write callbacks.
9092  *
9093  * In order to extract both the index and the trace_array descriptor,
9094  * get_tr_index() uses the following algorithm.
9095  *
9096  *   idx = *ptr;
9097  *
9098  * As the pointer itself contains the address of the index (remember
9099  * index[1] == 1).
9100  *
9101  * Then to get the trace_array descriptor, by subtracting that index
9102  * from the ptr, we get to the start of the index itself.
9103  *
9104  *   ptr - idx == &index[0]
9105  *
9106  * Then a simple container_of() from that pointer gets us to the
9107  * trace_array descriptor.
9108  */
9109 static void get_tr_index(void *data, struct trace_array **ptr,
9110 			 unsigned int *pindex)
9111 {
9112 	*pindex = *(unsigned char *)data;
9113 
9114 	*ptr = container_of(data - *pindex, struct trace_array,
9115 			    trace_flags_index);
9116 }
9117 
9118 static ssize_t
9119 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
9120 			loff_t *ppos)
9121 {
9122 	void *tr_index = filp->private_data;
9123 	struct trace_array *tr;
9124 	unsigned int index;
9125 	char *buf;
9126 
9127 	get_tr_index(tr_index, &tr, &index);
9128 
9129 	if (tr->trace_flags & (1 << index))
9130 		buf = "1\n";
9131 	else
9132 		buf = "0\n";
9133 
9134 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
9135 }
9136 
9137 static ssize_t
9138 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
9139 			 loff_t *ppos)
9140 {
9141 	void *tr_index = filp->private_data;
9142 	struct trace_array *tr;
9143 	unsigned int index;
9144 	unsigned long val;
9145 	int ret;
9146 
9147 	get_tr_index(tr_index, &tr, &index);
9148 
9149 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9150 	if (ret)
9151 		return ret;
9152 
9153 	if (val != 0 && val != 1)
9154 		return -EINVAL;
9155 
9156 	mutex_lock(&event_mutex);
9157 	mutex_lock(&trace_types_lock);
9158 	ret = set_tracer_flag(tr, 1 << index, val);
9159 	mutex_unlock(&trace_types_lock);
9160 	mutex_unlock(&event_mutex);
9161 
9162 	if (ret < 0)
9163 		return ret;
9164 
9165 	*ppos += cnt;
9166 
9167 	return cnt;
9168 }
9169 
9170 static const struct file_operations trace_options_core_fops = {
9171 	.open = tracing_open_generic,
9172 	.read = trace_options_core_read,
9173 	.write = trace_options_core_write,
9174 	.llseek = generic_file_llseek,
9175 };
9176 
9177 struct dentry *trace_create_file(const char *name,
9178 				 umode_t mode,
9179 				 struct dentry *parent,
9180 				 void *data,
9181 				 const struct file_operations *fops)
9182 {
9183 	struct dentry *ret;
9184 
9185 	ret = tracefs_create_file(name, mode, parent, data, fops);
9186 	if (!ret)
9187 		pr_warn("Could not create tracefs '%s' entry\n", name);
9188 
9189 	return ret;
9190 }
9191 
9192 
9193 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9194 {
9195 	struct dentry *d_tracer;
9196 
9197 	if (tr->options)
9198 		return tr->options;
9199 
9200 	d_tracer = tracing_get_dentry(tr);
9201 	if (IS_ERR(d_tracer))
9202 		return NULL;
9203 
9204 	tr->options = tracefs_create_dir("options", d_tracer);
9205 	if (!tr->options) {
9206 		pr_warn("Could not create tracefs directory 'options'\n");
9207 		return NULL;
9208 	}
9209 
9210 	return tr->options;
9211 }
9212 
9213 static void
9214 create_trace_option_file(struct trace_array *tr,
9215 			 struct trace_option_dentry *topt,
9216 			 struct tracer_flags *flags,
9217 			 struct tracer_opt *opt)
9218 {
9219 	struct dentry *t_options;
9220 
9221 	t_options = trace_options_init_dentry(tr);
9222 	if (!t_options)
9223 		return;
9224 
9225 	topt->flags = flags;
9226 	topt->opt = opt;
9227 	topt->tr = tr;
9228 
9229 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9230 					t_options, topt, &trace_options_fops);
9231 
9232 }
9233 
9234 static void
9235 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9236 {
9237 	struct trace_option_dentry *topts;
9238 	struct trace_options *tr_topts;
9239 	struct tracer_flags *flags;
9240 	struct tracer_opt *opts;
9241 	int cnt;
9242 	int i;
9243 
9244 	if (!tracer)
9245 		return;
9246 
9247 	flags = tracer->flags;
9248 
9249 	if (!flags || !flags->opts)
9250 		return;
9251 
9252 	/*
9253 	 * If this is an instance, only create flags for tracers
9254 	 * the instance may have.
9255 	 */
9256 	if (!trace_ok_for_array(tracer, tr))
9257 		return;
9258 
9259 	for (i = 0; i < tr->nr_topts; i++) {
9260 		/* Make sure there's no duplicate flags. */
9261 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9262 			return;
9263 	}
9264 
9265 	opts = flags->opts;
9266 
9267 	for (cnt = 0; opts[cnt].name; cnt++)
9268 		;
9269 
9270 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9271 	if (!topts)
9272 		return;
9273 
9274 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9275 			    GFP_KERNEL);
9276 	if (!tr_topts) {
9277 		kfree(topts);
9278 		return;
9279 	}
9280 
9281 	tr->topts = tr_topts;
9282 	tr->topts[tr->nr_topts].tracer = tracer;
9283 	tr->topts[tr->nr_topts].topts = topts;
9284 	tr->nr_topts++;
9285 
9286 	for (cnt = 0; opts[cnt].name; cnt++) {
9287 		create_trace_option_file(tr, &topts[cnt], flags,
9288 					 &opts[cnt]);
9289 		MEM_FAIL(topts[cnt].entry == NULL,
9290 			  "Failed to create trace option: %s",
9291 			  opts[cnt].name);
9292 	}
9293 }
9294 
9295 static struct dentry *
9296 create_trace_option_core_file(struct trace_array *tr,
9297 			      const char *option, long index)
9298 {
9299 	struct dentry *t_options;
9300 
9301 	t_options = trace_options_init_dentry(tr);
9302 	if (!t_options)
9303 		return NULL;
9304 
9305 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9306 				 (void *)&tr->trace_flags_index[index],
9307 				 &trace_options_core_fops);
9308 }
9309 
9310 static void create_trace_options_dir(struct trace_array *tr)
9311 {
9312 	struct dentry *t_options;
9313 	bool top_level = tr == &global_trace;
9314 	int i;
9315 
9316 	t_options = trace_options_init_dentry(tr);
9317 	if (!t_options)
9318 		return;
9319 
9320 	for (i = 0; trace_options[i]; i++) {
9321 		if (top_level ||
9322 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9323 			create_trace_option_core_file(tr, trace_options[i], i);
9324 	}
9325 }
9326 
9327 static ssize_t
9328 rb_simple_read(struct file *filp, char __user *ubuf,
9329 	       size_t cnt, loff_t *ppos)
9330 {
9331 	struct trace_array *tr = filp->private_data;
9332 	char buf[64];
9333 	int r;
9334 
9335 	r = tracer_tracing_is_on(tr);
9336 	r = sprintf(buf, "%d\n", r);
9337 
9338 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9339 }
9340 
9341 static ssize_t
9342 rb_simple_write(struct file *filp, const char __user *ubuf,
9343 		size_t cnt, loff_t *ppos)
9344 {
9345 	struct trace_array *tr = filp->private_data;
9346 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9347 	unsigned long val;
9348 	int ret;
9349 
9350 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9351 	if (ret)
9352 		return ret;
9353 
9354 	if (buffer) {
9355 		mutex_lock(&trace_types_lock);
9356 		if (!!val == tracer_tracing_is_on(tr)) {
9357 			val = 0; /* do nothing */
9358 		} else if (val) {
9359 			tracer_tracing_on(tr);
9360 			if (tr->current_trace->start)
9361 				tr->current_trace->start(tr);
9362 		} else {
9363 			tracer_tracing_off(tr);
9364 			if (tr->current_trace->stop)
9365 				tr->current_trace->stop(tr);
9366 			/* Wake up any waiters */
9367 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9368 		}
9369 		mutex_unlock(&trace_types_lock);
9370 	}
9371 
9372 	(*ppos)++;
9373 
9374 	return cnt;
9375 }
9376 
9377 static const struct file_operations rb_simple_fops = {
9378 	.open		= tracing_open_generic_tr,
9379 	.read		= rb_simple_read,
9380 	.write		= rb_simple_write,
9381 	.release	= tracing_release_generic_tr,
9382 	.llseek		= default_llseek,
9383 };
9384 
9385 static ssize_t
9386 buffer_percent_read(struct file *filp, char __user *ubuf,
9387 		    size_t cnt, loff_t *ppos)
9388 {
9389 	struct trace_array *tr = filp->private_data;
9390 	char buf[64];
9391 	int r;
9392 
9393 	r = tr->buffer_percent;
9394 	r = sprintf(buf, "%d\n", r);
9395 
9396 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9397 }
9398 
9399 static ssize_t
9400 buffer_percent_write(struct file *filp, const char __user *ubuf,
9401 		     size_t cnt, loff_t *ppos)
9402 {
9403 	struct trace_array *tr = filp->private_data;
9404 	unsigned long val;
9405 	int ret;
9406 
9407 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9408 	if (ret)
9409 		return ret;
9410 
9411 	if (val > 100)
9412 		return -EINVAL;
9413 
9414 	tr->buffer_percent = val;
9415 
9416 	(*ppos)++;
9417 
9418 	return cnt;
9419 }
9420 
9421 static const struct file_operations buffer_percent_fops = {
9422 	.open		= tracing_open_generic_tr,
9423 	.read		= buffer_percent_read,
9424 	.write		= buffer_percent_write,
9425 	.release	= tracing_release_generic_tr,
9426 	.llseek		= default_llseek,
9427 };
9428 
9429 static ssize_t
9430 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9431 {
9432 	struct trace_array *tr = filp->private_data;
9433 	size_t size;
9434 	char buf[64];
9435 	int order;
9436 	int r;
9437 
9438 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9439 	size = (PAGE_SIZE << order) / 1024;
9440 
9441 	r = sprintf(buf, "%zd\n", size);
9442 
9443 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9444 }
9445 
9446 static ssize_t
9447 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9448 			 size_t cnt, loff_t *ppos)
9449 {
9450 	struct trace_array *tr = filp->private_data;
9451 	unsigned long val;
9452 	int old_order;
9453 	int order;
9454 	int pages;
9455 	int ret;
9456 
9457 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9458 	if (ret)
9459 		return ret;
9460 
9461 	val *= 1024; /* value passed in is in KB */
9462 
9463 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9464 	order = fls(pages - 1);
9465 
9466 	/* limit between 1 and 128 system pages */
9467 	if (order < 0 || order > 7)
9468 		return -EINVAL;
9469 
9470 	/* Do not allow tracing while changing the order of the ring buffer */
9471 	tracing_stop_tr(tr);
9472 
9473 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9474 	if (old_order == order)
9475 		goto out;
9476 
9477 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9478 	if (ret)
9479 		goto out;
9480 
9481 #ifdef CONFIG_TRACER_MAX_TRACE
9482 
9483 	if (!tr->allocated_snapshot)
9484 		goto out_max;
9485 
9486 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9487 	if (ret) {
9488 		/* Put back the old order */
9489 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9490 		if (WARN_ON_ONCE(cnt)) {
9491 			/*
9492 			 * AARGH! We are left with different orders!
9493 			 * The max buffer is our "snapshot" buffer.
9494 			 * When a tracer needs a snapshot (one of the
9495 			 * latency tracers), it swaps the max buffer
9496 			 * with the saved snap shot. We succeeded to
9497 			 * update the order of the main buffer, but failed to
9498 			 * update the order of the max buffer. But when we tried
9499 			 * to reset the main buffer to the original size, we
9500 			 * failed there too. This is very unlikely to
9501 			 * happen, but if it does, warn and kill all
9502 			 * tracing.
9503 			 */
9504 			tracing_disabled = 1;
9505 		}
9506 		goto out;
9507 	}
9508  out_max:
9509 #endif
9510 	(*ppos)++;
9511  out:
9512 	if (ret)
9513 		cnt = ret;
9514 	tracing_start_tr(tr);
9515 	return cnt;
9516 }
9517 
9518 static const struct file_operations buffer_subbuf_size_fops = {
9519 	.open		= tracing_open_generic_tr,
9520 	.read		= buffer_subbuf_size_read,
9521 	.write		= buffer_subbuf_size_write,
9522 	.release	= tracing_release_generic_tr,
9523 	.llseek		= default_llseek,
9524 };
9525 
9526 static struct dentry *trace_instance_dir;
9527 
9528 static void
9529 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9530 
9531 static int
9532 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9533 {
9534 	enum ring_buffer_flags rb_flags;
9535 
9536 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9537 
9538 	buf->tr = tr;
9539 
9540 	buf->buffer = ring_buffer_alloc(size, rb_flags);
9541 	if (!buf->buffer)
9542 		return -ENOMEM;
9543 
9544 	buf->data = alloc_percpu(struct trace_array_cpu);
9545 	if (!buf->data) {
9546 		ring_buffer_free(buf->buffer);
9547 		buf->buffer = NULL;
9548 		return -ENOMEM;
9549 	}
9550 
9551 	/* Allocate the first page for all buffers */
9552 	set_buffer_entries(&tr->array_buffer,
9553 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9554 
9555 	return 0;
9556 }
9557 
9558 static void free_trace_buffer(struct array_buffer *buf)
9559 {
9560 	if (buf->buffer) {
9561 		ring_buffer_free(buf->buffer);
9562 		buf->buffer = NULL;
9563 		free_percpu(buf->data);
9564 		buf->data = NULL;
9565 	}
9566 }
9567 
9568 static int allocate_trace_buffers(struct trace_array *tr, int size)
9569 {
9570 	int ret;
9571 
9572 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9573 	if (ret)
9574 		return ret;
9575 
9576 #ifdef CONFIG_TRACER_MAX_TRACE
9577 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9578 				    allocate_snapshot ? size : 1);
9579 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9580 		free_trace_buffer(&tr->array_buffer);
9581 		return -ENOMEM;
9582 	}
9583 	tr->allocated_snapshot = allocate_snapshot;
9584 
9585 	allocate_snapshot = false;
9586 #endif
9587 
9588 	return 0;
9589 }
9590 
9591 static void free_trace_buffers(struct trace_array *tr)
9592 {
9593 	if (!tr)
9594 		return;
9595 
9596 	free_trace_buffer(&tr->array_buffer);
9597 
9598 #ifdef CONFIG_TRACER_MAX_TRACE
9599 	free_trace_buffer(&tr->max_buffer);
9600 #endif
9601 }
9602 
9603 static void init_trace_flags_index(struct trace_array *tr)
9604 {
9605 	int i;
9606 
9607 	/* Used by the trace options files */
9608 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9609 		tr->trace_flags_index[i] = i;
9610 }
9611 
9612 static void __update_tracer_options(struct trace_array *tr)
9613 {
9614 	struct tracer *t;
9615 
9616 	for (t = trace_types; t; t = t->next)
9617 		add_tracer_options(tr, t);
9618 }
9619 
9620 static void update_tracer_options(struct trace_array *tr)
9621 {
9622 	mutex_lock(&trace_types_lock);
9623 	tracer_options_updated = true;
9624 	__update_tracer_options(tr);
9625 	mutex_unlock(&trace_types_lock);
9626 }
9627 
9628 /* Must have trace_types_lock held */
9629 struct trace_array *trace_array_find(const char *instance)
9630 {
9631 	struct trace_array *tr, *found = NULL;
9632 
9633 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9634 		if (tr->name && strcmp(tr->name, instance) == 0) {
9635 			found = tr;
9636 			break;
9637 		}
9638 	}
9639 
9640 	return found;
9641 }
9642 
9643 struct trace_array *trace_array_find_get(const char *instance)
9644 {
9645 	struct trace_array *tr;
9646 
9647 	mutex_lock(&trace_types_lock);
9648 	tr = trace_array_find(instance);
9649 	if (tr)
9650 		tr->ref++;
9651 	mutex_unlock(&trace_types_lock);
9652 
9653 	return tr;
9654 }
9655 
9656 static int trace_array_create_dir(struct trace_array *tr)
9657 {
9658 	int ret;
9659 
9660 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9661 	if (!tr->dir)
9662 		return -EINVAL;
9663 
9664 	ret = event_trace_add_tracer(tr->dir, tr);
9665 	if (ret) {
9666 		tracefs_remove(tr->dir);
9667 		return ret;
9668 	}
9669 
9670 	init_tracer_tracefs(tr, tr->dir);
9671 	__update_tracer_options(tr);
9672 
9673 	return ret;
9674 }
9675 
9676 static struct trace_array *
9677 trace_array_create_systems(const char *name, const char *systems)
9678 {
9679 	struct trace_array *tr;
9680 	int ret;
9681 
9682 	ret = -ENOMEM;
9683 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9684 	if (!tr)
9685 		return ERR_PTR(ret);
9686 
9687 	tr->name = kstrdup(name, GFP_KERNEL);
9688 	if (!tr->name)
9689 		goto out_free_tr;
9690 
9691 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9692 		goto out_free_tr;
9693 
9694 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9695 		goto out_free_tr;
9696 
9697 	if (systems) {
9698 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9699 		if (!tr->system_names)
9700 			goto out_free_tr;
9701 	}
9702 
9703 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9704 
9705 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9706 
9707 	raw_spin_lock_init(&tr->start_lock);
9708 
9709 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9710 
9711 	tr->current_trace = &nop_trace;
9712 
9713 	INIT_LIST_HEAD(&tr->systems);
9714 	INIT_LIST_HEAD(&tr->events);
9715 	INIT_LIST_HEAD(&tr->hist_vars);
9716 	INIT_LIST_HEAD(&tr->err_log);
9717 
9718 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9719 		goto out_free_tr;
9720 
9721 	/* The ring buffer is defaultly expanded */
9722 	trace_set_ring_buffer_expanded(tr);
9723 
9724 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9725 		goto out_free_tr;
9726 
9727 	ftrace_init_trace_array(tr);
9728 
9729 	init_trace_flags_index(tr);
9730 
9731 	if (trace_instance_dir) {
9732 		ret = trace_array_create_dir(tr);
9733 		if (ret)
9734 			goto out_free_tr;
9735 	} else
9736 		__trace_early_add_events(tr);
9737 
9738 	list_add(&tr->list, &ftrace_trace_arrays);
9739 
9740 	tr->ref++;
9741 
9742 	return tr;
9743 
9744  out_free_tr:
9745 	ftrace_free_ftrace_ops(tr);
9746 	free_trace_buffers(tr);
9747 	free_cpumask_var(tr->pipe_cpumask);
9748 	free_cpumask_var(tr->tracing_cpumask);
9749 	kfree_const(tr->system_names);
9750 	kfree(tr->name);
9751 	kfree(tr);
9752 
9753 	return ERR_PTR(ret);
9754 }
9755 
9756 static struct trace_array *trace_array_create(const char *name)
9757 {
9758 	return trace_array_create_systems(name, NULL);
9759 }
9760 
9761 static int instance_mkdir(const char *name)
9762 {
9763 	struct trace_array *tr;
9764 	int ret;
9765 
9766 	mutex_lock(&event_mutex);
9767 	mutex_lock(&trace_types_lock);
9768 
9769 	ret = -EEXIST;
9770 	if (trace_array_find(name))
9771 		goto out_unlock;
9772 
9773 	tr = trace_array_create(name);
9774 
9775 	ret = PTR_ERR_OR_ZERO(tr);
9776 
9777 out_unlock:
9778 	mutex_unlock(&trace_types_lock);
9779 	mutex_unlock(&event_mutex);
9780 	return ret;
9781 }
9782 
9783 /**
9784  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9785  * @name: The name of the trace array to be looked up/created.
9786  * @systems: A list of systems to create event directories for (NULL for all)
9787  *
9788  * Returns pointer to trace array with given name.
9789  * NULL, if it cannot be created.
9790  *
9791  * NOTE: This function increments the reference counter associated with the
9792  * trace array returned. This makes sure it cannot be freed while in use.
9793  * Use trace_array_put() once the trace array is no longer needed.
9794  * If the trace_array is to be freed, trace_array_destroy() needs to
9795  * be called after the trace_array_put(), or simply let user space delete
9796  * it from the tracefs instances directory. But until the
9797  * trace_array_put() is called, user space can not delete it.
9798  *
9799  */
9800 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9801 {
9802 	struct trace_array *tr;
9803 
9804 	mutex_lock(&event_mutex);
9805 	mutex_lock(&trace_types_lock);
9806 
9807 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9808 		if (tr->name && strcmp(tr->name, name) == 0)
9809 			goto out_unlock;
9810 	}
9811 
9812 	tr = trace_array_create_systems(name, systems);
9813 
9814 	if (IS_ERR(tr))
9815 		tr = NULL;
9816 out_unlock:
9817 	if (tr)
9818 		tr->ref++;
9819 
9820 	mutex_unlock(&trace_types_lock);
9821 	mutex_unlock(&event_mutex);
9822 	return tr;
9823 }
9824 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9825 
9826 static int __remove_instance(struct trace_array *tr)
9827 {
9828 	int i;
9829 
9830 	/* Reference counter for a newly created trace array = 1. */
9831 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9832 		return -EBUSY;
9833 
9834 	list_del(&tr->list);
9835 
9836 	/* Disable all the flags that were enabled coming in */
9837 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9838 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9839 			set_tracer_flag(tr, 1 << i, 0);
9840 	}
9841 
9842 	tracing_set_nop(tr);
9843 	clear_ftrace_function_probes(tr);
9844 	event_trace_del_tracer(tr);
9845 	ftrace_clear_pids(tr);
9846 	ftrace_destroy_function_files(tr);
9847 	tracefs_remove(tr->dir);
9848 	free_percpu(tr->last_func_repeats);
9849 	free_trace_buffers(tr);
9850 	clear_tracing_err_log(tr);
9851 
9852 	for (i = 0; i < tr->nr_topts; i++) {
9853 		kfree(tr->topts[i].topts);
9854 	}
9855 	kfree(tr->topts);
9856 
9857 	free_cpumask_var(tr->pipe_cpumask);
9858 	free_cpumask_var(tr->tracing_cpumask);
9859 	kfree_const(tr->system_names);
9860 	kfree(tr->name);
9861 	kfree(tr);
9862 
9863 	return 0;
9864 }
9865 
9866 int trace_array_destroy(struct trace_array *this_tr)
9867 {
9868 	struct trace_array *tr;
9869 	int ret;
9870 
9871 	if (!this_tr)
9872 		return -EINVAL;
9873 
9874 	mutex_lock(&event_mutex);
9875 	mutex_lock(&trace_types_lock);
9876 
9877 	ret = -ENODEV;
9878 
9879 	/* Making sure trace array exists before destroying it. */
9880 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9881 		if (tr == this_tr) {
9882 			ret = __remove_instance(tr);
9883 			break;
9884 		}
9885 	}
9886 
9887 	mutex_unlock(&trace_types_lock);
9888 	mutex_unlock(&event_mutex);
9889 
9890 	return ret;
9891 }
9892 EXPORT_SYMBOL_GPL(trace_array_destroy);
9893 
9894 static int instance_rmdir(const char *name)
9895 {
9896 	struct trace_array *tr;
9897 	int ret;
9898 
9899 	mutex_lock(&event_mutex);
9900 	mutex_lock(&trace_types_lock);
9901 
9902 	ret = -ENODEV;
9903 	tr = trace_array_find(name);
9904 	if (tr)
9905 		ret = __remove_instance(tr);
9906 
9907 	mutex_unlock(&trace_types_lock);
9908 	mutex_unlock(&event_mutex);
9909 
9910 	return ret;
9911 }
9912 
9913 static __init void create_trace_instances(struct dentry *d_tracer)
9914 {
9915 	struct trace_array *tr;
9916 
9917 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9918 							 instance_mkdir,
9919 							 instance_rmdir);
9920 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9921 		return;
9922 
9923 	mutex_lock(&event_mutex);
9924 	mutex_lock(&trace_types_lock);
9925 
9926 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9927 		if (!tr->name)
9928 			continue;
9929 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9930 			     "Failed to create instance directory\n"))
9931 			break;
9932 	}
9933 
9934 	mutex_unlock(&trace_types_lock);
9935 	mutex_unlock(&event_mutex);
9936 }
9937 
9938 static void
9939 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9940 {
9941 	int cpu;
9942 
9943 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9944 			tr, &show_traces_fops);
9945 
9946 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9947 			tr, &set_tracer_fops);
9948 
9949 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9950 			  tr, &tracing_cpumask_fops);
9951 
9952 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9953 			  tr, &tracing_iter_fops);
9954 
9955 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9956 			  tr, &tracing_fops);
9957 
9958 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9959 			  tr, &tracing_pipe_fops);
9960 
9961 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9962 			  tr, &tracing_entries_fops);
9963 
9964 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9965 			  tr, &tracing_total_entries_fops);
9966 
9967 	trace_create_file("free_buffer", 0200, d_tracer,
9968 			  tr, &tracing_free_buffer_fops);
9969 
9970 	trace_create_file("trace_marker", 0220, d_tracer,
9971 			  tr, &tracing_mark_fops);
9972 
9973 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9974 
9975 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9976 			  tr, &tracing_mark_raw_fops);
9977 
9978 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9979 			  &trace_clock_fops);
9980 
9981 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9982 			  tr, &rb_simple_fops);
9983 
9984 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9985 			  &trace_time_stamp_mode_fops);
9986 
9987 	tr->buffer_percent = 50;
9988 
9989 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9990 			tr, &buffer_percent_fops);
9991 
9992 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9993 			  tr, &buffer_subbuf_size_fops);
9994 
9995 	create_trace_options_dir(tr);
9996 
9997 #ifdef CONFIG_TRACER_MAX_TRACE
9998 	trace_create_maxlat_file(tr, d_tracer);
9999 #endif
10000 
10001 	if (ftrace_create_function_files(tr, d_tracer))
10002 		MEM_FAIL(1, "Could not allocate function filter files");
10003 
10004 #ifdef CONFIG_TRACER_SNAPSHOT
10005 	trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
10006 			  tr, &snapshot_fops);
10007 #endif
10008 
10009 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
10010 			  tr, &tracing_err_log_fops);
10011 
10012 	for_each_tracing_cpu(cpu)
10013 		tracing_init_tracefs_percpu(tr, cpu);
10014 
10015 	ftrace_init_tracefs(tr, d_tracer);
10016 }
10017 
10018 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
10019 {
10020 	struct vfsmount *mnt;
10021 	struct file_system_type *type;
10022 
10023 	/*
10024 	 * To maintain backward compatibility for tools that mount
10025 	 * debugfs to get to the tracing facility, tracefs is automatically
10026 	 * mounted to the debugfs/tracing directory.
10027 	 */
10028 	type = get_fs_type("tracefs");
10029 	if (!type)
10030 		return NULL;
10031 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
10032 	put_filesystem(type);
10033 	if (IS_ERR(mnt))
10034 		return NULL;
10035 	mntget(mnt);
10036 
10037 	return mnt;
10038 }
10039 
10040 /**
10041  * tracing_init_dentry - initialize top level trace array
10042  *
10043  * This is called when creating files or directories in the tracing
10044  * directory. It is called via fs_initcall() by any of the boot up code
10045  * and expects to return the dentry of the top level tracing directory.
10046  */
10047 int tracing_init_dentry(void)
10048 {
10049 	struct trace_array *tr = &global_trace;
10050 
10051 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10052 		pr_warn("Tracing disabled due to lockdown\n");
10053 		return -EPERM;
10054 	}
10055 
10056 	/* The top level trace array uses  NULL as parent */
10057 	if (tr->dir)
10058 		return 0;
10059 
10060 	if (WARN_ON(!tracefs_initialized()))
10061 		return -ENODEV;
10062 
10063 	/*
10064 	 * As there may still be users that expect the tracing
10065 	 * files to exist in debugfs/tracing, we must automount
10066 	 * the tracefs file system there, so older tools still
10067 	 * work with the newer kernel.
10068 	 */
10069 	tr->dir = debugfs_create_automount("tracing", NULL,
10070 					   trace_automount, NULL);
10071 
10072 	return 0;
10073 }
10074 
10075 extern struct trace_eval_map *__start_ftrace_eval_maps[];
10076 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
10077 
10078 static struct workqueue_struct *eval_map_wq __initdata;
10079 static struct work_struct eval_map_work __initdata;
10080 static struct work_struct tracerfs_init_work __initdata;
10081 
10082 static void __init eval_map_work_func(struct work_struct *work)
10083 {
10084 	int len;
10085 
10086 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10087 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10088 }
10089 
10090 static int __init trace_eval_init(void)
10091 {
10092 	INIT_WORK(&eval_map_work, eval_map_work_func);
10093 
10094 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10095 	if (!eval_map_wq) {
10096 		pr_err("Unable to allocate eval_map_wq\n");
10097 		/* Do work here */
10098 		eval_map_work_func(&eval_map_work);
10099 		return -ENOMEM;
10100 	}
10101 
10102 	queue_work(eval_map_wq, &eval_map_work);
10103 	return 0;
10104 }
10105 
10106 subsys_initcall(trace_eval_init);
10107 
10108 static int __init trace_eval_sync(void)
10109 {
10110 	/* Make sure the eval map updates are finished */
10111 	if (eval_map_wq)
10112 		destroy_workqueue(eval_map_wq);
10113 	return 0;
10114 }
10115 
10116 late_initcall_sync(trace_eval_sync);
10117 
10118 
10119 #ifdef CONFIG_MODULES
10120 static void trace_module_add_evals(struct module *mod)
10121 {
10122 	if (!mod->num_trace_evals)
10123 		return;
10124 
10125 	/*
10126 	 * Modules with bad taint do not have events created, do
10127 	 * not bother with enums either.
10128 	 */
10129 	if (trace_module_has_bad_taint(mod))
10130 		return;
10131 
10132 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10133 }
10134 
10135 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10136 static void trace_module_remove_evals(struct module *mod)
10137 {
10138 	union trace_eval_map_item *map;
10139 	union trace_eval_map_item **last = &trace_eval_maps;
10140 
10141 	if (!mod->num_trace_evals)
10142 		return;
10143 
10144 	mutex_lock(&trace_eval_mutex);
10145 
10146 	map = trace_eval_maps;
10147 
10148 	while (map) {
10149 		if (map->head.mod == mod)
10150 			break;
10151 		map = trace_eval_jmp_to_tail(map);
10152 		last = &map->tail.next;
10153 		map = map->tail.next;
10154 	}
10155 	if (!map)
10156 		goto out;
10157 
10158 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10159 	kfree(map);
10160  out:
10161 	mutex_unlock(&trace_eval_mutex);
10162 }
10163 #else
10164 static inline void trace_module_remove_evals(struct module *mod) { }
10165 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10166 
10167 static int trace_module_notify(struct notifier_block *self,
10168 			       unsigned long val, void *data)
10169 {
10170 	struct module *mod = data;
10171 
10172 	switch (val) {
10173 	case MODULE_STATE_COMING:
10174 		trace_module_add_evals(mod);
10175 		break;
10176 	case MODULE_STATE_GOING:
10177 		trace_module_remove_evals(mod);
10178 		break;
10179 	}
10180 
10181 	return NOTIFY_OK;
10182 }
10183 
10184 static struct notifier_block trace_module_nb = {
10185 	.notifier_call = trace_module_notify,
10186 	.priority = 0,
10187 };
10188 #endif /* CONFIG_MODULES */
10189 
10190 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10191 {
10192 
10193 	event_trace_init();
10194 
10195 	init_tracer_tracefs(&global_trace, NULL);
10196 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10197 
10198 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10199 			&global_trace, &tracing_thresh_fops);
10200 
10201 	trace_create_file("README", TRACE_MODE_READ, NULL,
10202 			NULL, &tracing_readme_fops);
10203 
10204 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10205 			NULL, &tracing_saved_cmdlines_fops);
10206 
10207 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10208 			  NULL, &tracing_saved_cmdlines_size_fops);
10209 
10210 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10211 			NULL, &tracing_saved_tgids_fops);
10212 
10213 	trace_create_eval_file(NULL);
10214 
10215 #ifdef CONFIG_MODULES
10216 	register_module_notifier(&trace_module_nb);
10217 #endif
10218 
10219 #ifdef CONFIG_DYNAMIC_FTRACE
10220 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10221 			NULL, &tracing_dyn_info_fops);
10222 #endif
10223 
10224 	create_trace_instances(NULL);
10225 
10226 	update_tracer_options(&global_trace);
10227 }
10228 
10229 static __init int tracer_init_tracefs(void)
10230 {
10231 	int ret;
10232 
10233 	trace_access_lock_init();
10234 
10235 	ret = tracing_init_dentry();
10236 	if (ret)
10237 		return 0;
10238 
10239 	if (eval_map_wq) {
10240 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10241 		queue_work(eval_map_wq, &tracerfs_init_work);
10242 	} else {
10243 		tracer_init_tracefs_work_func(NULL);
10244 	}
10245 
10246 	rv_init_interface();
10247 
10248 	return 0;
10249 }
10250 
10251 fs_initcall(tracer_init_tracefs);
10252 
10253 static int trace_die_panic_handler(struct notifier_block *self,
10254 				unsigned long ev, void *unused);
10255 
10256 static struct notifier_block trace_panic_notifier = {
10257 	.notifier_call = trace_die_panic_handler,
10258 	.priority = INT_MAX - 1,
10259 };
10260 
10261 static struct notifier_block trace_die_notifier = {
10262 	.notifier_call = trace_die_panic_handler,
10263 	.priority = INT_MAX - 1,
10264 };
10265 
10266 /*
10267  * The idea is to execute the following die/panic callback early, in order
10268  * to avoid showing irrelevant information in the trace (like other panic
10269  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10270  * warnings get disabled (to prevent potential log flooding).
10271  */
10272 static int trace_die_panic_handler(struct notifier_block *self,
10273 				unsigned long ev, void *unused)
10274 {
10275 	if (!ftrace_dump_on_oops)
10276 		return NOTIFY_DONE;
10277 
10278 	/* The die notifier requires DIE_OOPS to trigger */
10279 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10280 		return NOTIFY_DONE;
10281 
10282 	ftrace_dump(ftrace_dump_on_oops);
10283 
10284 	return NOTIFY_DONE;
10285 }
10286 
10287 /*
10288  * printk is set to max of 1024, we really don't need it that big.
10289  * Nothing should be printing 1000 characters anyway.
10290  */
10291 #define TRACE_MAX_PRINT		1000
10292 
10293 /*
10294  * Define here KERN_TRACE so that we have one place to modify
10295  * it if we decide to change what log level the ftrace dump
10296  * should be at.
10297  */
10298 #define KERN_TRACE		KERN_EMERG
10299 
10300 void
10301 trace_printk_seq(struct trace_seq *s)
10302 {
10303 	/* Probably should print a warning here. */
10304 	if (s->seq.len >= TRACE_MAX_PRINT)
10305 		s->seq.len = TRACE_MAX_PRINT;
10306 
10307 	/*
10308 	 * More paranoid code. Although the buffer size is set to
10309 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10310 	 * an extra layer of protection.
10311 	 */
10312 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10313 		s->seq.len = s->seq.size - 1;
10314 
10315 	/* should be zero ended, but we are paranoid. */
10316 	s->buffer[s->seq.len] = 0;
10317 
10318 	printk(KERN_TRACE "%s", s->buffer);
10319 
10320 	trace_seq_init(s);
10321 }
10322 
10323 void trace_init_global_iter(struct trace_iterator *iter)
10324 {
10325 	iter->tr = &global_trace;
10326 	iter->trace = iter->tr->current_trace;
10327 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10328 	iter->array_buffer = &global_trace.array_buffer;
10329 
10330 	if (iter->trace && iter->trace->open)
10331 		iter->trace->open(iter);
10332 
10333 	/* Annotate start of buffers if we had overruns */
10334 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10335 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10336 
10337 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10338 	if (trace_clocks[iter->tr->clock_id].in_ns)
10339 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10340 
10341 	/* Can not use kmalloc for iter.temp and iter.fmt */
10342 	iter->temp = static_temp_buf;
10343 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10344 	iter->fmt = static_fmt_buf;
10345 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10346 }
10347 
10348 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10349 {
10350 	/* use static because iter can be a bit big for the stack */
10351 	static struct trace_iterator iter;
10352 	static atomic_t dump_running;
10353 	struct trace_array *tr = &global_trace;
10354 	unsigned int old_userobj;
10355 	unsigned long flags;
10356 	int cnt = 0, cpu;
10357 
10358 	/* Only allow one dump user at a time. */
10359 	if (atomic_inc_return(&dump_running) != 1) {
10360 		atomic_dec(&dump_running);
10361 		return;
10362 	}
10363 
10364 	/*
10365 	 * Always turn off tracing when we dump.
10366 	 * We don't need to show trace output of what happens
10367 	 * between multiple crashes.
10368 	 *
10369 	 * If the user does a sysrq-z, then they can re-enable
10370 	 * tracing with echo 1 > tracing_on.
10371 	 */
10372 	tracing_off();
10373 
10374 	local_irq_save(flags);
10375 
10376 	/* Simulate the iterator */
10377 	trace_init_global_iter(&iter);
10378 
10379 	for_each_tracing_cpu(cpu) {
10380 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10381 	}
10382 
10383 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10384 
10385 	/* don't look at user memory in panic mode */
10386 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10387 
10388 	switch (oops_dump_mode) {
10389 	case DUMP_ALL:
10390 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10391 		break;
10392 	case DUMP_ORIG:
10393 		iter.cpu_file = raw_smp_processor_id();
10394 		break;
10395 	case DUMP_NONE:
10396 		goto out_enable;
10397 	default:
10398 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10399 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10400 	}
10401 
10402 	printk(KERN_TRACE "Dumping ftrace buffer:\n");
10403 
10404 	/* Did function tracer already get disabled? */
10405 	if (ftrace_is_dead()) {
10406 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10407 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10408 	}
10409 
10410 	/*
10411 	 * We need to stop all tracing on all CPUS to read
10412 	 * the next buffer. This is a bit expensive, but is
10413 	 * not done often. We fill all what we can read,
10414 	 * and then release the locks again.
10415 	 */
10416 
10417 	while (!trace_empty(&iter)) {
10418 
10419 		if (!cnt)
10420 			printk(KERN_TRACE "---------------------------------\n");
10421 
10422 		cnt++;
10423 
10424 		trace_iterator_reset(&iter);
10425 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10426 
10427 		if (trace_find_next_entry_inc(&iter) != NULL) {
10428 			int ret;
10429 
10430 			ret = print_trace_line(&iter);
10431 			if (ret != TRACE_TYPE_NO_CONSUME)
10432 				trace_consume(&iter);
10433 		}
10434 		touch_nmi_watchdog();
10435 
10436 		trace_printk_seq(&iter.seq);
10437 	}
10438 
10439 	if (!cnt)
10440 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10441 	else
10442 		printk(KERN_TRACE "---------------------------------\n");
10443 
10444  out_enable:
10445 	tr->trace_flags |= old_userobj;
10446 
10447 	for_each_tracing_cpu(cpu) {
10448 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10449 	}
10450 	atomic_dec(&dump_running);
10451 	local_irq_restore(flags);
10452 }
10453 EXPORT_SYMBOL_GPL(ftrace_dump);
10454 
10455 #define WRITE_BUFSIZE  4096
10456 
10457 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10458 				size_t count, loff_t *ppos,
10459 				int (*createfn)(const char *))
10460 {
10461 	char *kbuf, *buf, *tmp;
10462 	int ret = 0;
10463 	size_t done = 0;
10464 	size_t size;
10465 
10466 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10467 	if (!kbuf)
10468 		return -ENOMEM;
10469 
10470 	while (done < count) {
10471 		size = count - done;
10472 
10473 		if (size >= WRITE_BUFSIZE)
10474 			size = WRITE_BUFSIZE - 1;
10475 
10476 		if (copy_from_user(kbuf, buffer + done, size)) {
10477 			ret = -EFAULT;
10478 			goto out;
10479 		}
10480 		kbuf[size] = '\0';
10481 		buf = kbuf;
10482 		do {
10483 			tmp = strchr(buf, '\n');
10484 			if (tmp) {
10485 				*tmp = '\0';
10486 				size = tmp - buf + 1;
10487 			} else {
10488 				size = strlen(buf);
10489 				if (done + size < count) {
10490 					if (buf != kbuf)
10491 						break;
10492 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10493 					pr_warn("Line length is too long: Should be less than %d\n",
10494 						WRITE_BUFSIZE - 2);
10495 					ret = -EINVAL;
10496 					goto out;
10497 				}
10498 			}
10499 			done += size;
10500 
10501 			/* Remove comments */
10502 			tmp = strchr(buf, '#');
10503 
10504 			if (tmp)
10505 				*tmp = '\0';
10506 
10507 			ret = createfn(buf);
10508 			if (ret)
10509 				goto out;
10510 			buf += size;
10511 
10512 		} while (done < count);
10513 	}
10514 	ret = done;
10515 
10516 out:
10517 	kfree(kbuf);
10518 
10519 	return ret;
10520 }
10521 
10522 #ifdef CONFIG_TRACER_MAX_TRACE
10523 __init static bool tr_needs_alloc_snapshot(const char *name)
10524 {
10525 	char *test;
10526 	int len = strlen(name);
10527 	bool ret;
10528 
10529 	if (!boot_snapshot_index)
10530 		return false;
10531 
10532 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10533 	    boot_snapshot_info[len] == '\t')
10534 		return true;
10535 
10536 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10537 	if (!test)
10538 		return false;
10539 
10540 	sprintf(test, "\t%s\t", name);
10541 	ret = strstr(boot_snapshot_info, test) == NULL;
10542 	kfree(test);
10543 	return ret;
10544 }
10545 
10546 __init static void do_allocate_snapshot(const char *name)
10547 {
10548 	if (!tr_needs_alloc_snapshot(name))
10549 		return;
10550 
10551 	/*
10552 	 * When allocate_snapshot is set, the next call to
10553 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10554 	 * will allocate the snapshot buffer. That will alse clear
10555 	 * this flag.
10556 	 */
10557 	allocate_snapshot = true;
10558 }
10559 #else
10560 static inline void do_allocate_snapshot(const char *name) { }
10561 #endif
10562 
10563 __init static void enable_instances(void)
10564 {
10565 	struct trace_array *tr;
10566 	char *curr_str;
10567 	char *str;
10568 	char *tok;
10569 
10570 	/* A tab is always appended */
10571 	boot_instance_info[boot_instance_index - 1] = '\0';
10572 	str = boot_instance_info;
10573 
10574 	while ((curr_str = strsep(&str, "\t"))) {
10575 
10576 		tok = strsep(&curr_str, ",");
10577 
10578 		if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10579 			do_allocate_snapshot(tok);
10580 
10581 		tr = trace_array_get_by_name(tok, NULL);
10582 		if (!tr) {
10583 			pr_warn("Failed to create instance buffer %s\n", curr_str);
10584 			continue;
10585 		}
10586 		/* Allow user space to delete it */
10587 		trace_array_put(tr);
10588 
10589 		while ((tok = strsep(&curr_str, ","))) {
10590 			early_enable_events(tr, tok, true);
10591 		}
10592 	}
10593 }
10594 
10595 __init static int tracer_alloc_buffers(void)
10596 {
10597 	int ring_buf_size;
10598 	int ret = -ENOMEM;
10599 
10600 
10601 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10602 		pr_warn("Tracing disabled due to lockdown\n");
10603 		return -EPERM;
10604 	}
10605 
10606 	/*
10607 	 * Make sure we don't accidentally add more trace options
10608 	 * than we have bits for.
10609 	 */
10610 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10611 
10612 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10613 		goto out;
10614 
10615 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10616 		goto out_free_buffer_mask;
10617 
10618 	/* Only allocate trace_printk buffers if a trace_printk exists */
10619 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10620 		/* Must be called before global_trace.buffer is allocated */
10621 		trace_printk_init_buffers();
10622 
10623 	/* To save memory, keep the ring buffer size to its minimum */
10624 	if (global_trace.ring_buffer_expanded)
10625 		ring_buf_size = trace_buf_size;
10626 	else
10627 		ring_buf_size = 1;
10628 
10629 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10630 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10631 
10632 	raw_spin_lock_init(&global_trace.start_lock);
10633 
10634 	/*
10635 	 * The prepare callbacks allocates some memory for the ring buffer. We
10636 	 * don't free the buffer if the CPU goes down. If we were to free
10637 	 * the buffer, then the user would lose any trace that was in the
10638 	 * buffer. The memory will be removed once the "instance" is removed.
10639 	 */
10640 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10641 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10642 				      NULL);
10643 	if (ret < 0)
10644 		goto out_free_cpumask;
10645 	/* Used for event triggers */
10646 	ret = -ENOMEM;
10647 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10648 	if (!temp_buffer)
10649 		goto out_rm_hp_state;
10650 
10651 	if (trace_create_savedcmd() < 0)
10652 		goto out_free_temp_buffer;
10653 
10654 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10655 		goto out_free_savedcmd;
10656 
10657 	/* TODO: make the number of buffers hot pluggable with CPUS */
10658 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10659 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10660 		goto out_free_pipe_cpumask;
10661 	}
10662 	if (global_trace.buffer_disabled)
10663 		tracing_off();
10664 
10665 	if (trace_boot_clock) {
10666 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10667 		if (ret < 0)
10668 			pr_warn("Trace clock %s not defined, going back to default\n",
10669 				trace_boot_clock);
10670 	}
10671 
10672 	/*
10673 	 * register_tracer() might reference current_trace, so it
10674 	 * needs to be set before we register anything. This is
10675 	 * just a bootstrap of current_trace anyway.
10676 	 */
10677 	global_trace.current_trace = &nop_trace;
10678 
10679 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10680 
10681 	ftrace_init_global_array_ops(&global_trace);
10682 
10683 	init_trace_flags_index(&global_trace);
10684 
10685 	register_tracer(&nop_trace);
10686 
10687 	/* Function tracing may start here (via kernel command line) */
10688 	init_function_trace();
10689 
10690 	/* All seems OK, enable tracing */
10691 	tracing_disabled = 0;
10692 
10693 	atomic_notifier_chain_register(&panic_notifier_list,
10694 				       &trace_panic_notifier);
10695 
10696 	register_die_notifier(&trace_die_notifier);
10697 
10698 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10699 
10700 	INIT_LIST_HEAD(&global_trace.systems);
10701 	INIT_LIST_HEAD(&global_trace.events);
10702 	INIT_LIST_HEAD(&global_trace.hist_vars);
10703 	INIT_LIST_HEAD(&global_trace.err_log);
10704 	list_add(&global_trace.list, &ftrace_trace_arrays);
10705 
10706 	apply_trace_boot_options();
10707 
10708 	register_snapshot_cmd();
10709 
10710 	test_can_verify();
10711 
10712 	return 0;
10713 
10714 out_free_pipe_cpumask:
10715 	free_cpumask_var(global_trace.pipe_cpumask);
10716 out_free_savedcmd:
10717 	free_saved_cmdlines_buffer(savedcmd);
10718 out_free_temp_buffer:
10719 	ring_buffer_free(temp_buffer);
10720 out_rm_hp_state:
10721 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10722 out_free_cpumask:
10723 	free_cpumask_var(global_trace.tracing_cpumask);
10724 out_free_buffer_mask:
10725 	free_cpumask_var(tracing_buffer_mask);
10726 out:
10727 	return ret;
10728 }
10729 
10730 void __init ftrace_boot_snapshot(void)
10731 {
10732 #ifdef CONFIG_TRACER_MAX_TRACE
10733 	struct trace_array *tr;
10734 
10735 	if (!snapshot_at_boot)
10736 		return;
10737 
10738 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10739 		if (!tr->allocated_snapshot)
10740 			continue;
10741 
10742 		tracing_snapshot_instance(tr);
10743 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10744 	}
10745 #endif
10746 }
10747 
10748 void __init early_trace_init(void)
10749 {
10750 	if (tracepoint_printk) {
10751 		tracepoint_print_iter =
10752 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10753 		if (MEM_FAIL(!tracepoint_print_iter,
10754 			     "Failed to allocate trace iterator\n"))
10755 			tracepoint_printk = 0;
10756 		else
10757 			static_key_enable(&tracepoint_printk_key.key);
10758 	}
10759 	tracer_alloc_buffers();
10760 
10761 	init_events();
10762 }
10763 
10764 void __init trace_init(void)
10765 {
10766 	trace_event_init();
10767 
10768 	if (boot_instance_index)
10769 		enable_instances();
10770 }
10771 
10772 __init static void clear_boot_tracer(void)
10773 {
10774 	/*
10775 	 * The default tracer at boot buffer is an init section.
10776 	 * This function is called in lateinit. If we did not
10777 	 * find the boot tracer, then clear it out, to prevent
10778 	 * later registration from accessing the buffer that is
10779 	 * about to be freed.
10780 	 */
10781 	if (!default_bootup_tracer)
10782 		return;
10783 
10784 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10785 	       default_bootup_tracer);
10786 	default_bootup_tracer = NULL;
10787 }
10788 
10789 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10790 __init static void tracing_set_default_clock(void)
10791 {
10792 	/* sched_clock_stable() is determined in late_initcall */
10793 	if (!trace_boot_clock && !sched_clock_stable()) {
10794 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10795 			pr_warn("Can not set tracing clock due to lockdown\n");
10796 			return;
10797 		}
10798 
10799 		printk(KERN_WARNING
10800 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10801 		       "If you want to keep using the local clock, then add:\n"
10802 		       "  \"trace_clock=local\"\n"
10803 		       "on the kernel command line\n");
10804 		tracing_set_clock(&global_trace, "global");
10805 	}
10806 }
10807 #else
10808 static inline void tracing_set_default_clock(void) { }
10809 #endif
10810 
10811 __init static int late_trace_init(void)
10812 {
10813 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10814 		static_key_disable(&tracepoint_printk_key.key);
10815 		tracepoint_printk = 0;
10816 	}
10817 
10818 	tracing_set_default_clock();
10819 	clear_boot_tracer();
10820 	return 0;
10821 }
10822 
10823 late_initcall_sync(late_trace_init);
10824