xref: /linux/kernel/trace/trace.c (revision 572af9f284669d31d9175122bbef9bc62cea8ded)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * ring buffer based function tracer
4  *
5  * Copyright (C) 2007-2012 Steven Rostedt <srostedt@redhat.com>
6  * Copyright (C) 2008 Ingo Molnar <mingo@redhat.com>
7  *
8  * Originally taken from the RT patch by:
9  *    Arnaldo Carvalho de Melo <acme@redhat.com>
10  *
11  * Based on code from the latency_tracer, that is:
12  *  Copyright (C) 2004-2006 Ingo Molnar
13  *  Copyright (C) 2004 Nadia Yvette Chambers
14  */
15 #include <linux/ring_buffer.h>
16 #include <linux/utsname.h>
17 #include <linux/stacktrace.h>
18 #include <linux/writeback.h>
19 #include <linux/kallsyms.h>
20 #include <linux/security.h>
21 #include <linux/seq_file.h>
22 #include <linux/irqflags.h>
23 #include <linux/debugfs.h>
24 #include <linux/tracefs.h>
25 #include <linux/pagemap.h>
26 #include <linux/hardirq.h>
27 #include <linux/linkage.h>
28 #include <linux/uaccess.h>
29 #include <linux/vmalloc.h>
30 #include <linux/ftrace.h>
31 #include <linux/module.h>
32 #include <linux/percpu.h>
33 #include <linux/splice.h>
34 #include <linux/kdebug.h>
35 #include <linux/string.h>
36 #include <linux/mount.h>
37 #include <linux/rwsem.h>
38 #include <linux/slab.h>
39 #include <linux/ctype.h>
40 #include <linux/init.h>
41 #include <linux/panic_notifier.h>
42 #include <linux/poll.h>
43 #include <linux/nmi.h>
44 #include <linux/fs.h>
45 #include <linux/trace.h>
46 #include <linux/sched/clock.h>
47 #include <linux/sched/rt.h>
48 #include <linux/fsnotify.h>
49 #include <linux/irq_work.h>
50 #include <linux/workqueue.h>
51 
52 #include <asm/setup.h> /* COMMAND_LINE_SIZE */
53 
54 #include "trace.h"
55 #include "trace_output.h"
56 
57 #ifdef CONFIG_FTRACE_STARTUP_TEST
58 /*
59  * We need to change this state when a selftest is running.
60  * A selftest will lurk into the ring-buffer to count the
61  * entries inserted during the selftest although some concurrent
62  * insertions into the ring-buffer such as trace_printk could occurred
63  * at the same time, giving false positive or negative results.
64  */
65 static bool __read_mostly tracing_selftest_running;
66 
67 /*
68  * If boot-time tracing including tracers/events via kernel cmdline
69  * is running, we do not want to run SELFTEST.
70  */
71 bool __read_mostly tracing_selftest_disabled;
72 
73 void __init disable_tracing_selftest(const char *reason)
74 {
75 	if (!tracing_selftest_disabled) {
76 		tracing_selftest_disabled = true;
77 		pr_info("Ftrace startup test is disabled due to %s\n", reason);
78 	}
79 }
80 #else
81 #define tracing_selftest_running	0
82 #define tracing_selftest_disabled	0
83 #endif
84 
85 /* Pipe tracepoints to printk */
86 static struct trace_iterator *tracepoint_print_iter;
87 int tracepoint_printk;
88 static bool tracepoint_printk_stop_on_boot __initdata;
89 static DEFINE_STATIC_KEY_FALSE(tracepoint_printk_key);
90 
91 /* For tracers that don't implement custom flags */
92 static struct tracer_opt dummy_tracer_opt[] = {
93 	{ }
94 };
95 
96 static int
97 dummy_set_flag(struct trace_array *tr, u32 old_flags, u32 bit, int set)
98 {
99 	return 0;
100 }
101 
102 /*
103  * To prevent the comm cache from being overwritten when no
104  * tracing is active, only save the comm when a trace event
105  * occurred.
106  */
107 DEFINE_PER_CPU(bool, trace_taskinfo_save);
108 
109 /*
110  * Kill all tracing for good (never come back).
111  * It is initialized to 1 but will turn to zero if the initialization
112  * of the tracer is successful. But that is the only place that sets
113  * this back to zero.
114  */
115 static int tracing_disabled = 1;
116 
117 cpumask_var_t __read_mostly	tracing_buffer_mask;
118 
119 /*
120  * ftrace_dump_on_oops - variable to dump ftrace buffer on oops
121  *
122  * If there is an oops (or kernel panic) and the ftrace_dump_on_oops
123  * is set, then ftrace_dump is called. This will output the contents
124  * of the ftrace buffers to the console.  This is very useful for
125  * capturing traces that lead to crashes and outputing it to a
126  * serial console.
127  *
128  * It is default off, but you can enable it with either specifying
129  * "ftrace_dump_on_oops" in the kernel command line, or setting
130  * /proc/sys/kernel/ftrace_dump_on_oops
131  * Set 1 if you want to dump buffers of all CPUs
132  * Set 2 if you want to dump the buffer of the CPU that triggered oops
133  * Set instance name if you want to dump the specific trace instance
134  * Multiple instance dump is also supported, and instances are seperated
135  * by commas.
136  */
137 /* Set to string format zero to disable by default */
138 char ftrace_dump_on_oops[MAX_TRACER_SIZE] = "0";
139 
140 /* When set, tracing will stop when a WARN*() is hit */
141 int __disable_trace_on_warning;
142 
143 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
144 /* Map of enums to their values, for "eval_map" file */
145 struct trace_eval_map_head {
146 	struct module			*mod;
147 	unsigned long			length;
148 };
149 
150 union trace_eval_map_item;
151 
152 struct trace_eval_map_tail {
153 	/*
154 	 * "end" is first and points to NULL as it must be different
155 	 * than "mod" or "eval_string"
156 	 */
157 	union trace_eval_map_item	*next;
158 	const char			*end;	/* points to NULL */
159 };
160 
161 static DEFINE_MUTEX(trace_eval_mutex);
162 
163 /*
164  * The trace_eval_maps are saved in an array with two extra elements,
165  * one at the beginning, and one at the end. The beginning item contains
166  * the count of the saved maps (head.length), and the module they
167  * belong to if not built in (head.mod). The ending item contains a
168  * pointer to the next array of saved eval_map items.
169  */
170 union trace_eval_map_item {
171 	struct trace_eval_map		map;
172 	struct trace_eval_map_head	head;
173 	struct trace_eval_map_tail	tail;
174 };
175 
176 static union trace_eval_map_item *trace_eval_maps;
177 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
178 
179 int tracing_set_tracer(struct trace_array *tr, const char *buf);
180 static void ftrace_trace_userstack(struct trace_array *tr,
181 				   struct trace_buffer *buffer,
182 				   unsigned int trace_ctx);
183 
184 static char bootup_tracer_buf[MAX_TRACER_SIZE] __initdata;
185 static char *default_bootup_tracer;
186 
187 static bool allocate_snapshot;
188 static bool snapshot_at_boot;
189 
190 static char boot_instance_info[COMMAND_LINE_SIZE] __initdata;
191 static int boot_instance_index;
192 
193 static char boot_snapshot_info[COMMAND_LINE_SIZE] __initdata;
194 static int boot_snapshot_index;
195 
196 static int __init set_cmdline_ftrace(char *str)
197 {
198 	strscpy(bootup_tracer_buf, str, MAX_TRACER_SIZE);
199 	default_bootup_tracer = bootup_tracer_buf;
200 	/* We are using ftrace early, expand it */
201 	trace_set_ring_buffer_expanded(NULL);
202 	return 1;
203 }
204 __setup("ftrace=", set_cmdline_ftrace);
205 
206 int ftrace_dump_on_oops_enabled(void)
207 {
208 	if (!strcmp("0", ftrace_dump_on_oops))
209 		return 0;
210 	else
211 		return 1;
212 }
213 
214 static int __init set_ftrace_dump_on_oops(char *str)
215 {
216 	if (!*str) {
217 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
218 		return 1;
219 	}
220 
221 	if (*str == ',') {
222 		strscpy(ftrace_dump_on_oops, "1", MAX_TRACER_SIZE);
223 		strscpy(ftrace_dump_on_oops + 1, str, MAX_TRACER_SIZE - 1);
224 		return 1;
225 	}
226 
227 	if (*str++ == '=') {
228 		strscpy(ftrace_dump_on_oops, str, MAX_TRACER_SIZE);
229 		return 1;
230 	}
231 
232 	return 0;
233 }
234 __setup("ftrace_dump_on_oops", set_ftrace_dump_on_oops);
235 
236 static int __init stop_trace_on_warning(char *str)
237 {
238 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
239 		__disable_trace_on_warning = 1;
240 	return 1;
241 }
242 __setup("traceoff_on_warning", stop_trace_on_warning);
243 
244 static int __init boot_alloc_snapshot(char *str)
245 {
246 	char *slot = boot_snapshot_info + boot_snapshot_index;
247 	int left = sizeof(boot_snapshot_info) - boot_snapshot_index;
248 	int ret;
249 
250 	if (str[0] == '=') {
251 		str++;
252 		if (strlen(str) >= left)
253 			return -1;
254 
255 		ret = snprintf(slot, left, "%s\t", str);
256 		boot_snapshot_index += ret;
257 	} else {
258 		allocate_snapshot = true;
259 		/* We also need the main ring buffer expanded */
260 		trace_set_ring_buffer_expanded(NULL);
261 	}
262 	return 1;
263 }
264 __setup("alloc_snapshot", boot_alloc_snapshot);
265 
266 
267 static int __init boot_snapshot(char *str)
268 {
269 	snapshot_at_boot = true;
270 	boot_alloc_snapshot(str);
271 	return 1;
272 }
273 __setup("ftrace_boot_snapshot", boot_snapshot);
274 
275 
276 static int __init boot_instance(char *str)
277 {
278 	char *slot = boot_instance_info + boot_instance_index;
279 	int left = sizeof(boot_instance_info) - boot_instance_index;
280 	int ret;
281 
282 	if (strlen(str) >= left)
283 		return -1;
284 
285 	ret = snprintf(slot, left, "%s\t", str);
286 	boot_instance_index += ret;
287 
288 	return 1;
289 }
290 __setup("trace_instance=", boot_instance);
291 
292 
293 static char trace_boot_options_buf[MAX_TRACER_SIZE] __initdata;
294 
295 static int __init set_trace_boot_options(char *str)
296 {
297 	strscpy(trace_boot_options_buf, str, MAX_TRACER_SIZE);
298 	return 1;
299 }
300 __setup("trace_options=", set_trace_boot_options);
301 
302 static char trace_boot_clock_buf[MAX_TRACER_SIZE] __initdata;
303 static char *trace_boot_clock __initdata;
304 
305 static int __init set_trace_boot_clock(char *str)
306 {
307 	strscpy(trace_boot_clock_buf, str, MAX_TRACER_SIZE);
308 	trace_boot_clock = trace_boot_clock_buf;
309 	return 1;
310 }
311 __setup("trace_clock=", set_trace_boot_clock);
312 
313 static int __init set_tracepoint_printk(char *str)
314 {
315 	/* Ignore the "tp_printk_stop_on_boot" param */
316 	if (*str == '_')
317 		return 0;
318 
319 	if ((strcmp(str, "=0") != 0 && strcmp(str, "=off") != 0))
320 		tracepoint_printk = 1;
321 	return 1;
322 }
323 __setup("tp_printk", set_tracepoint_printk);
324 
325 static int __init set_tracepoint_printk_stop(char *str)
326 {
327 	tracepoint_printk_stop_on_boot = true;
328 	return 1;
329 }
330 __setup("tp_printk_stop_on_boot", set_tracepoint_printk_stop);
331 
332 unsigned long long ns2usecs(u64 nsec)
333 {
334 	nsec += 500;
335 	do_div(nsec, 1000);
336 	return nsec;
337 }
338 
339 static void
340 trace_process_export(struct trace_export *export,
341 	       struct ring_buffer_event *event, int flag)
342 {
343 	struct trace_entry *entry;
344 	unsigned int size = 0;
345 
346 	if (export->flags & flag) {
347 		entry = ring_buffer_event_data(event);
348 		size = ring_buffer_event_length(event);
349 		export->write(export, entry, size);
350 	}
351 }
352 
353 static DEFINE_MUTEX(ftrace_export_lock);
354 
355 static struct trace_export __rcu *ftrace_exports_list __read_mostly;
356 
357 static DEFINE_STATIC_KEY_FALSE(trace_function_exports_enabled);
358 static DEFINE_STATIC_KEY_FALSE(trace_event_exports_enabled);
359 static DEFINE_STATIC_KEY_FALSE(trace_marker_exports_enabled);
360 
361 static inline void ftrace_exports_enable(struct trace_export *export)
362 {
363 	if (export->flags & TRACE_EXPORT_FUNCTION)
364 		static_branch_inc(&trace_function_exports_enabled);
365 
366 	if (export->flags & TRACE_EXPORT_EVENT)
367 		static_branch_inc(&trace_event_exports_enabled);
368 
369 	if (export->flags & TRACE_EXPORT_MARKER)
370 		static_branch_inc(&trace_marker_exports_enabled);
371 }
372 
373 static inline void ftrace_exports_disable(struct trace_export *export)
374 {
375 	if (export->flags & TRACE_EXPORT_FUNCTION)
376 		static_branch_dec(&trace_function_exports_enabled);
377 
378 	if (export->flags & TRACE_EXPORT_EVENT)
379 		static_branch_dec(&trace_event_exports_enabled);
380 
381 	if (export->flags & TRACE_EXPORT_MARKER)
382 		static_branch_dec(&trace_marker_exports_enabled);
383 }
384 
385 static void ftrace_exports(struct ring_buffer_event *event, int flag)
386 {
387 	struct trace_export *export;
388 
389 	preempt_disable_notrace();
390 
391 	export = rcu_dereference_raw_check(ftrace_exports_list);
392 	while (export) {
393 		trace_process_export(export, event, flag);
394 		export = rcu_dereference_raw_check(export->next);
395 	}
396 
397 	preempt_enable_notrace();
398 }
399 
400 static inline void
401 add_trace_export(struct trace_export **list, struct trace_export *export)
402 {
403 	rcu_assign_pointer(export->next, *list);
404 	/*
405 	 * We are entering export into the list but another
406 	 * CPU might be walking that list. We need to make sure
407 	 * the export->next pointer is valid before another CPU sees
408 	 * the export pointer included into the list.
409 	 */
410 	rcu_assign_pointer(*list, export);
411 }
412 
413 static inline int
414 rm_trace_export(struct trace_export **list, struct trace_export *export)
415 {
416 	struct trace_export **p;
417 
418 	for (p = list; *p != NULL; p = &(*p)->next)
419 		if (*p == export)
420 			break;
421 
422 	if (*p != export)
423 		return -1;
424 
425 	rcu_assign_pointer(*p, (*p)->next);
426 
427 	return 0;
428 }
429 
430 static inline void
431 add_ftrace_export(struct trace_export **list, struct trace_export *export)
432 {
433 	ftrace_exports_enable(export);
434 
435 	add_trace_export(list, export);
436 }
437 
438 static inline int
439 rm_ftrace_export(struct trace_export **list, struct trace_export *export)
440 {
441 	int ret;
442 
443 	ret = rm_trace_export(list, export);
444 	ftrace_exports_disable(export);
445 
446 	return ret;
447 }
448 
449 int register_ftrace_export(struct trace_export *export)
450 {
451 	if (WARN_ON_ONCE(!export->write))
452 		return -1;
453 
454 	mutex_lock(&ftrace_export_lock);
455 
456 	add_ftrace_export(&ftrace_exports_list, export);
457 
458 	mutex_unlock(&ftrace_export_lock);
459 
460 	return 0;
461 }
462 EXPORT_SYMBOL_GPL(register_ftrace_export);
463 
464 int unregister_ftrace_export(struct trace_export *export)
465 {
466 	int ret;
467 
468 	mutex_lock(&ftrace_export_lock);
469 
470 	ret = rm_ftrace_export(&ftrace_exports_list, export);
471 
472 	mutex_unlock(&ftrace_export_lock);
473 
474 	return ret;
475 }
476 EXPORT_SYMBOL_GPL(unregister_ftrace_export);
477 
478 /* trace_flags holds trace_options default values */
479 #define TRACE_DEFAULT_FLAGS						\
480 	(FUNCTION_DEFAULT_FLAGS |					\
481 	 TRACE_ITER_PRINT_PARENT | TRACE_ITER_PRINTK |			\
482 	 TRACE_ITER_ANNOTATE | TRACE_ITER_CONTEXT_INFO |		\
483 	 TRACE_ITER_RECORD_CMD | TRACE_ITER_OVERWRITE |			\
484 	 TRACE_ITER_IRQ_INFO | TRACE_ITER_MARKERS |			\
485 	 TRACE_ITER_HASH_PTR | TRACE_ITER_TRACE_PRINTK)
486 
487 /* trace_options that are only supported by global_trace */
488 #define TOP_LEVEL_TRACE_FLAGS (TRACE_ITER_PRINTK |			\
489 	       TRACE_ITER_PRINTK_MSGONLY | TRACE_ITER_RECORD_CMD)
490 
491 /* trace_flags that are default zero for instances */
492 #define ZEROED_TRACE_FLAGS \
493 	(TRACE_ITER_EVENT_FORK | TRACE_ITER_FUNC_FORK | TRACE_ITER_TRACE_PRINTK)
494 
495 /*
496  * The global_trace is the descriptor that holds the top-level tracing
497  * buffers for the live tracing.
498  */
499 static struct trace_array global_trace = {
500 	.trace_flags = TRACE_DEFAULT_FLAGS,
501 };
502 
503 static struct trace_array *printk_trace = &global_trace;
504 
505 static __always_inline bool printk_binsafe(struct trace_array *tr)
506 {
507 	/*
508 	 * The binary format of traceprintk can cause a crash if used
509 	 * by a buffer from another boot. Force the use of the
510 	 * non binary version of trace_printk if the trace_printk
511 	 * buffer is a boot mapped ring buffer.
512 	 */
513 	return !(tr->flags & TRACE_ARRAY_FL_BOOT);
514 }
515 
516 static void update_printk_trace(struct trace_array *tr)
517 {
518 	if (printk_trace == tr)
519 		return;
520 
521 	printk_trace->trace_flags &= ~TRACE_ITER_TRACE_PRINTK;
522 	printk_trace = tr;
523 	tr->trace_flags |= TRACE_ITER_TRACE_PRINTK;
524 }
525 
526 void trace_set_ring_buffer_expanded(struct trace_array *tr)
527 {
528 	if (!tr)
529 		tr = &global_trace;
530 	tr->ring_buffer_expanded = true;
531 }
532 
533 LIST_HEAD(ftrace_trace_arrays);
534 
535 int trace_array_get(struct trace_array *this_tr)
536 {
537 	struct trace_array *tr;
538 	int ret = -ENODEV;
539 
540 	mutex_lock(&trace_types_lock);
541 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
542 		if (tr == this_tr) {
543 			tr->ref++;
544 			ret = 0;
545 			break;
546 		}
547 	}
548 	mutex_unlock(&trace_types_lock);
549 
550 	return ret;
551 }
552 
553 static void __trace_array_put(struct trace_array *this_tr)
554 {
555 	WARN_ON(!this_tr->ref);
556 	this_tr->ref--;
557 }
558 
559 /**
560  * trace_array_put - Decrement the reference counter for this trace array.
561  * @this_tr : pointer to the trace array
562  *
563  * NOTE: Use this when we no longer need the trace array returned by
564  * trace_array_get_by_name(). This ensures the trace array can be later
565  * destroyed.
566  *
567  */
568 void trace_array_put(struct trace_array *this_tr)
569 {
570 	if (!this_tr)
571 		return;
572 
573 	mutex_lock(&trace_types_lock);
574 	__trace_array_put(this_tr);
575 	mutex_unlock(&trace_types_lock);
576 }
577 EXPORT_SYMBOL_GPL(trace_array_put);
578 
579 int tracing_check_open_get_tr(struct trace_array *tr)
580 {
581 	int ret;
582 
583 	ret = security_locked_down(LOCKDOWN_TRACEFS);
584 	if (ret)
585 		return ret;
586 
587 	if (tracing_disabled)
588 		return -ENODEV;
589 
590 	if (tr && trace_array_get(tr) < 0)
591 		return -ENODEV;
592 
593 	return 0;
594 }
595 
596 /**
597  * trace_find_filtered_pid - check if a pid exists in a filtered_pid list
598  * @filtered_pids: The list of pids to check
599  * @search_pid: The PID to find in @filtered_pids
600  *
601  * Returns true if @search_pid is found in @filtered_pids, and false otherwise.
602  */
603 bool
604 trace_find_filtered_pid(struct trace_pid_list *filtered_pids, pid_t search_pid)
605 {
606 	return trace_pid_list_is_set(filtered_pids, search_pid);
607 }
608 
609 /**
610  * trace_ignore_this_task - should a task be ignored for tracing
611  * @filtered_pids: The list of pids to check
612  * @filtered_no_pids: The list of pids not to be traced
613  * @task: The task that should be ignored if not filtered
614  *
615  * Checks if @task should be traced or not from @filtered_pids.
616  * Returns true if @task should *NOT* be traced.
617  * Returns false if @task should be traced.
618  */
619 bool
620 trace_ignore_this_task(struct trace_pid_list *filtered_pids,
621 		       struct trace_pid_list *filtered_no_pids,
622 		       struct task_struct *task)
623 {
624 	/*
625 	 * If filtered_no_pids is not empty, and the task's pid is listed
626 	 * in filtered_no_pids, then return true.
627 	 * Otherwise, if filtered_pids is empty, that means we can
628 	 * trace all tasks. If it has content, then only trace pids
629 	 * within filtered_pids.
630 	 */
631 
632 	return (filtered_pids &&
633 		!trace_find_filtered_pid(filtered_pids, task->pid)) ||
634 		(filtered_no_pids &&
635 		 trace_find_filtered_pid(filtered_no_pids, task->pid));
636 }
637 
638 /**
639  * trace_filter_add_remove_task - Add or remove a task from a pid_list
640  * @pid_list: The list to modify
641  * @self: The current task for fork or NULL for exit
642  * @task: The task to add or remove
643  *
644  * If adding a task, if @self is defined, the task is only added if @self
645  * is also included in @pid_list. This happens on fork and tasks should
646  * only be added when the parent is listed. If @self is NULL, then the
647  * @task pid will be removed from the list, which would happen on exit
648  * of a task.
649  */
650 void trace_filter_add_remove_task(struct trace_pid_list *pid_list,
651 				  struct task_struct *self,
652 				  struct task_struct *task)
653 {
654 	if (!pid_list)
655 		return;
656 
657 	/* For forks, we only add if the forking task is listed */
658 	if (self) {
659 		if (!trace_find_filtered_pid(pid_list, self->pid))
660 			return;
661 	}
662 
663 	/* "self" is set for forks, and NULL for exits */
664 	if (self)
665 		trace_pid_list_set(pid_list, task->pid);
666 	else
667 		trace_pid_list_clear(pid_list, task->pid);
668 }
669 
670 /**
671  * trace_pid_next - Used for seq_file to get to the next pid of a pid_list
672  * @pid_list: The pid list to show
673  * @v: The last pid that was shown (+1 the actual pid to let zero be displayed)
674  * @pos: The position of the file
675  *
676  * This is used by the seq_file "next" operation to iterate the pids
677  * listed in a trace_pid_list structure.
678  *
679  * Returns the pid+1 as we want to display pid of zero, but NULL would
680  * stop the iteration.
681  */
682 void *trace_pid_next(struct trace_pid_list *pid_list, void *v, loff_t *pos)
683 {
684 	long pid = (unsigned long)v;
685 	unsigned int next;
686 
687 	(*pos)++;
688 
689 	/* pid already is +1 of the actual previous bit */
690 	if (trace_pid_list_next(pid_list, pid, &next) < 0)
691 		return NULL;
692 
693 	pid = next;
694 
695 	/* Return pid + 1 to allow zero to be represented */
696 	return (void *)(pid + 1);
697 }
698 
699 /**
700  * trace_pid_start - Used for seq_file to start reading pid lists
701  * @pid_list: The pid list to show
702  * @pos: The position of the file
703  *
704  * This is used by seq_file "start" operation to start the iteration
705  * of listing pids.
706  *
707  * Returns the pid+1 as we want to display pid of zero, but NULL would
708  * stop the iteration.
709  */
710 void *trace_pid_start(struct trace_pid_list *pid_list, loff_t *pos)
711 {
712 	unsigned long pid;
713 	unsigned int first;
714 	loff_t l = 0;
715 
716 	if (trace_pid_list_first(pid_list, &first) < 0)
717 		return NULL;
718 
719 	pid = first;
720 
721 	/* Return pid + 1 so that zero can be the exit value */
722 	for (pid++; pid && l < *pos;
723 	     pid = (unsigned long)trace_pid_next(pid_list, (void *)pid, &l))
724 		;
725 	return (void *)pid;
726 }
727 
728 /**
729  * trace_pid_show - show the current pid in seq_file processing
730  * @m: The seq_file structure to write into
731  * @v: A void pointer of the pid (+1) value to display
732  *
733  * Can be directly used by seq_file operations to display the current
734  * pid value.
735  */
736 int trace_pid_show(struct seq_file *m, void *v)
737 {
738 	unsigned long pid = (unsigned long)v - 1;
739 
740 	seq_printf(m, "%lu\n", pid);
741 	return 0;
742 }
743 
744 /* 128 should be much more than enough */
745 #define PID_BUF_SIZE		127
746 
747 int trace_pid_write(struct trace_pid_list *filtered_pids,
748 		    struct trace_pid_list **new_pid_list,
749 		    const char __user *ubuf, size_t cnt)
750 {
751 	struct trace_pid_list *pid_list;
752 	struct trace_parser parser;
753 	unsigned long val;
754 	int nr_pids = 0;
755 	ssize_t read = 0;
756 	ssize_t ret;
757 	loff_t pos;
758 	pid_t pid;
759 
760 	if (trace_parser_get_init(&parser, PID_BUF_SIZE + 1))
761 		return -ENOMEM;
762 
763 	/*
764 	 * Always recreate a new array. The write is an all or nothing
765 	 * operation. Always create a new array when adding new pids by
766 	 * the user. If the operation fails, then the current list is
767 	 * not modified.
768 	 */
769 	pid_list = trace_pid_list_alloc();
770 	if (!pid_list) {
771 		trace_parser_put(&parser);
772 		return -ENOMEM;
773 	}
774 
775 	if (filtered_pids) {
776 		/* copy the current bits to the new max */
777 		ret = trace_pid_list_first(filtered_pids, &pid);
778 		while (!ret) {
779 			trace_pid_list_set(pid_list, pid);
780 			ret = trace_pid_list_next(filtered_pids, pid + 1, &pid);
781 			nr_pids++;
782 		}
783 	}
784 
785 	ret = 0;
786 	while (cnt > 0) {
787 
788 		pos = 0;
789 
790 		ret = trace_get_user(&parser, ubuf, cnt, &pos);
791 		if (ret < 0)
792 			break;
793 
794 		read += ret;
795 		ubuf += ret;
796 		cnt -= ret;
797 
798 		if (!trace_parser_loaded(&parser))
799 			break;
800 
801 		ret = -EINVAL;
802 		if (kstrtoul(parser.buffer, 0, &val))
803 			break;
804 
805 		pid = (pid_t)val;
806 
807 		if (trace_pid_list_set(pid_list, pid) < 0) {
808 			ret = -1;
809 			break;
810 		}
811 		nr_pids++;
812 
813 		trace_parser_clear(&parser);
814 		ret = 0;
815 	}
816 	trace_parser_put(&parser);
817 
818 	if (ret < 0) {
819 		trace_pid_list_free(pid_list);
820 		return ret;
821 	}
822 
823 	if (!nr_pids) {
824 		/* Cleared the list of pids */
825 		trace_pid_list_free(pid_list);
826 		pid_list = NULL;
827 	}
828 
829 	*new_pid_list = pid_list;
830 
831 	return read;
832 }
833 
834 static u64 buffer_ftrace_now(struct array_buffer *buf, int cpu)
835 {
836 	u64 ts;
837 
838 	/* Early boot up does not have a buffer yet */
839 	if (!buf->buffer)
840 		return trace_clock_local();
841 
842 	ts = ring_buffer_time_stamp(buf->buffer);
843 	ring_buffer_normalize_time_stamp(buf->buffer, cpu, &ts);
844 
845 	return ts;
846 }
847 
848 u64 ftrace_now(int cpu)
849 {
850 	return buffer_ftrace_now(&global_trace.array_buffer, cpu);
851 }
852 
853 /**
854  * tracing_is_enabled - Show if global_trace has been enabled
855  *
856  * Shows if the global trace has been enabled or not. It uses the
857  * mirror flag "buffer_disabled" to be used in fast paths such as for
858  * the irqsoff tracer. But it may be inaccurate due to races. If you
859  * need to know the accurate state, use tracing_is_on() which is a little
860  * slower, but accurate.
861  */
862 int tracing_is_enabled(void)
863 {
864 	/*
865 	 * For quick access (irqsoff uses this in fast path), just
866 	 * return the mirror variable of the state of the ring buffer.
867 	 * It's a little racy, but we don't really care.
868 	 */
869 	smp_rmb();
870 	return !global_trace.buffer_disabled;
871 }
872 
873 /*
874  * trace_buf_size is the size in bytes that is allocated
875  * for a buffer. Note, the number of bytes is always rounded
876  * to page size.
877  *
878  * This number is purposely set to a low number of 16384.
879  * If the dump on oops happens, it will be much appreciated
880  * to not have to wait for all that output. Anyway this can be
881  * boot time and run time configurable.
882  */
883 #define TRACE_BUF_SIZE_DEFAULT	1441792UL /* 16384 * 88 (sizeof(entry)) */
884 
885 static unsigned long		trace_buf_size = TRACE_BUF_SIZE_DEFAULT;
886 
887 /* trace_types holds a link list of available tracers. */
888 static struct tracer		*trace_types __read_mostly;
889 
890 /*
891  * trace_types_lock is used to protect the trace_types list.
892  */
893 DEFINE_MUTEX(trace_types_lock);
894 
895 /*
896  * serialize the access of the ring buffer
897  *
898  * ring buffer serializes readers, but it is low level protection.
899  * The validity of the events (which returns by ring_buffer_peek() ..etc)
900  * are not protected by ring buffer.
901  *
902  * The content of events may become garbage if we allow other process consumes
903  * these events concurrently:
904  *   A) the page of the consumed events may become a normal page
905  *      (not reader page) in ring buffer, and this page will be rewritten
906  *      by events producer.
907  *   B) The page of the consumed events may become a page for splice_read,
908  *      and this page will be returned to system.
909  *
910  * These primitives allow multi process access to different cpu ring buffer
911  * concurrently.
912  *
913  * These primitives don't distinguish read-only and read-consume access.
914  * Multi read-only access are also serialized.
915  */
916 
917 #ifdef CONFIG_SMP
918 static DECLARE_RWSEM(all_cpu_access_lock);
919 static DEFINE_PER_CPU(struct mutex, cpu_access_lock);
920 
921 static inline void trace_access_lock(int cpu)
922 {
923 	if (cpu == RING_BUFFER_ALL_CPUS) {
924 		/* gain it for accessing the whole ring buffer. */
925 		down_write(&all_cpu_access_lock);
926 	} else {
927 		/* gain it for accessing a cpu ring buffer. */
928 
929 		/* Firstly block other trace_access_lock(RING_BUFFER_ALL_CPUS). */
930 		down_read(&all_cpu_access_lock);
931 
932 		/* Secondly block other access to this @cpu ring buffer. */
933 		mutex_lock(&per_cpu(cpu_access_lock, cpu));
934 	}
935 }
936 
937 static inline void trace_access_unlock(int cpu)
938 {
939 	if (cpu == RING_BUFFER_ALL_CPUS) {
940 		up_write(&all_cpu_access_lock);
941 	} else {
942 		mutex_unlock(&per_cpu(cpu_access_lock, cpu));
943 		up_read(&all_cpu_access_lock);
944 	}
945 }
946 
947 static inline void trace_access_lock_init(void)
948 {
949 	int cpu;
950 
951 	for_each_possible_cpu(cpu)
952 		mutex_init(&per_cpu(cpu_access_lock, cpu));
953 }
954 
955 #else
956 
957 static DEFINE_MUTEX(access_lock);
958 
959 static inline void trace_access_lock(int cpu)
960 {
961 	(void)cpu;
962 	mutex_lock(&access_lock);
963 }
964 
965 static inline void trace_access_unlock(int cpu)
966 {
967 	(void)cpu;
968 	mutex_unlock(&access_lock);
969 }
970 
971 static inline void trace_access_lock_init(void)
972 {
973 }
974 
975 #endif
976 
977 #ifdef CONFIG_STACKTRACE
978 static void __ftrace_trace_stack(struct trace_array *tr,
979 				 struct trace_buffer *buffer,
980 				 unsigned int trace_ctx,
981 				 int skip, struct pt_regs *regs);
982 static inline void ftrace_trace_stack(struct trace_array *tr,
983 				      struct trace_buffer *buffer,
984 				      unsigned int trace_ctx,
985 				      int skip, struct pt_regs *regs);
986 
987 #else
988 static inline void __ftrace_trace_stack(struct trace_array *tr,
989 					struct trace_buffer *buffer,
990 					unsigned int trace_ctx,
991 					int skip, struct pt_regs *regs)
992 {
993 }
994 static inline void ftrace_trace_stack(struct trace_array *tr,
995 				      struct trace_buffer *buffer,
996 				      unsigned long trace_ctx,
997 				      int skip, struct pt_regs *regs)
998 {
999 }
1000 
1001 #endif
1002 
1003 static __always_inline void
1004 trace_event_setup(struct ring_buffer_event *event,
1005 		  int type, unsigned int trace_ctx)
1006 {
1007 	struct trace_entry *ent = ring_buffer_event_data(event);
1008 
1009 	tracing_generic_entry_update(ent, type, trace_ctx);
1010 }
1011 
1012 static __always_inline struct ring_buffer_event *
1013 __trace_buffer_lock_reserve(struct trace_buffer *buffer,
1014 			  int type,
1015 			  unsigned long len,
1016 			  unsigned int trace_ctx)
1017 {
1018 	struct ring_buffer_event *event;
1019 
1020 	event = ring_buffer_lock_reserve(buffer, len);
1021 	if (event != NULL)
1022 		trace_event_setup(event, type, trace_ctx);
1023 
1024 	return event;
1025 }
1026 
1027 void tracer_tracing_on(struct trace_array *tr)
1028 {
1029 	if (tr->array_buffer.buffer)
1030 		ring_buffer_record_on(tr->array_buffer.buffer);
1031 	/*
1032 	 * This flag is looked at when buffers haven't been allocated
1033 	 * yet, or by some tracers (like irqsoff), that just want to
1034 	 * know if the ring buffer has been disabled, but it can handle
1035 	 * races of where it gets disabled but we still do a record.
1036 	 * As the check is in the fast path of the tracers, it is more
1037 	 * important to be fast than accurate.
1038 	 */
1039 	tr->buffer_disabled = 0;
1040 	/* Make the flag seen by readers */
1041 	smp_wmb();
1042 }
1043 
1044 /**
1045  * tracing_on - enable tracing buffers
1046  *
1047  * This function enables tracing buffers that may have been
1048  * disabled with tracing_off.
1049  */
1050 void tracing_on(void)
1051 {
1052 	tracer_tracing_on(&global_trace);
1053 }
1054 EXPORT_SYMBOL_GPL(tracing_on);
1055 
1056 
1057 static __always_inline void
1058 __buffer_unlock_commit(struct trace_buffer *buffer, struct ring_buffer_event *event)
1059 {
1060 	__this_cpu_write(trace_taskinfo_save, true);
1061 
1062 	/* If this is the temp buffer, we need to commit fully */
1063 	if (this_cpu_read(trace_buffered_event) == event) {
1064 		/* Length is in event->array[0] */
1065 		ring_buffer_write(buffer, event->array[0], &event->array[1]);
1066 		/* Release the temp buffer */
1067 		this_cpu_dec(trace_buffered_event_cnt);
1068 		/* ring_buffer_unlock_commit() enables preemption */
1069 		preempt_enable_notrace();
1070 	} else
1071 		ring_buffer_unlock_commit(buffer);
1072 }
1073 
1074 int __trace_array_puts(struct trace_array *tr, unsigned long ip,
1075 		       const char *str, int size)
1076 {
1077 	struct ring_buffer_event *event;
1078 	struct trace_buffer *buffer;
1079 	struct print_entry *entry;
1080 	unsigned int trace_ctx;
1081 	int alloc;
1082 
1083 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1084 		return 0;
1085 
1086 	if (unlikely(tracing_selftest_running && tr == &global_trace))
1087 		return 0;
1088 
1089 	if (unlikely(tracing_disabled))
1090 		return 0;
1091 
1092 	alloc = sizeof(*entry) + size + 2; /* possible \n added */
1093 
1094 	trace_ctx = tracing_gen_ctx();
1095 	buffer = tr->array_buffer.buffer;
1096 	ring_buffer_nest_start(buffer);
1097 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, alloc,
1098 					    trace_ctx);
1099 	if (!event) {
1100 		size = 0;
1101 		goto out;
1102 	}
1103 
1104 	entry = ring_buffer_event_data(event);
1105 	entry->ip = ip;
1106 
1107 	memcpy(&entry->buf, str, size);
1108 
1109 	/* Add a newline if necessary */
1110 	if (entry->buf[size - 1] != '\n') {
1111 		entry->buf[size] = '\n';
1112 		entry->buf[size + 1] = '\0';
1113 	} else
1114 		entry->buf[size] = '\0';
1115 
1116 	__buffer_unlock_commit(buffer, event);
1117 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1118  out:
1119 	ring_buffer_nest_end(buffer);
1120 	return size;
1121 }
1122 EXPORT_SYMBOL_GPL(__trace_array_puts);
1123 
1124 /**
1125  * __trace_puts - write a constant string into the trace buffer.
1126  * @ip:	   The address of the caller
1127  * @str:   The constant string to write
1128  * @size:  The size of the string.
1129  */
1130 int __trace_puts(unsigned long ip, const char *str, int size)
1131 {
1132 	return __trace_array_puts(printk_trace, ip, str, size);
1133 }
1134 EXPORT_SYMBOL_GPL(__trace_puts);
1135 
1136 /**
1137  * __trace_bputs - write the pointer to a constant string into trace buffer
1138  * @ip:	   The address of the caller
1139  * @str:   The constant string to write to the buffer to
1140  */
1141 int __trace_bputs(unsigned long ip, const char *str)
1142 {
1143 	struct trace_array *tr = READ_ONCE(printk_trace);
1144 	struct ring_buffer_event *event;
1145 	struct trace_buffer *buffer;
1146 	struct bputs_entry *entry;
1147 	unsigned int trace_ctx;
1148 	int size = sizeof(struct bputs_entry);
1149 	int ret = 0;
1150 
1151 	if (!printk_binsafe(tr))
1152 		return __trace_puts(ip, str, strlen(str));
1153 
1154 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
1155 		return 0;
1156 
1157 	if (unlikely(tracing_selftest_running || tracing_disabled))
1158 		return 0;
1159 
1160 	trace_ctx = tracing_gen_ctx();
1161 	buffer = tr->array_buffer.buffer;
1162 
1163 	ring_buffer_nest_start(buffer);
1164 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPUTS, size,
1165 					    trace_ctx);
1166 	if (!event)
1167 		goto out;
1168 
1169 	entry = ring_buffer_event_data(event);
1170 	entry->ip			= ip;
1171 	entry->str			= str;
1172 
1173 	__buffer_unlock_commit(buffer, event);
1174 	ftrace_trace_stack(tr, buffer, trace_ctx, 4, NULL);
1175 
1176 	ret = 1;
1177  out:
1178 	ring_buffer_nest_end(buffer);
1179 	return ret;
1180 }
1181 EXPORT_SYMBOL_GPL(__trace_bputs);
1182 
1183 #ifdef CONFIG_TRACER_SNAPSHOT
1184 static void tracing_snapshot_instance_cond(struct trace_array *tr,
1185 					   void *cond_data)
1186 {
1187 	struct tracer *tracer = tr->current_trace;
1188 	unsigned long flags;
1189 
1190 	if (in_nmi()) {
1191 		trace_array_puts(tr, "*** SNAPSHOT CALLED FROM NMI CONTEXT ***\n");
1192 		trace_array_puts(tr, "*** snapshot is being ignored        ***\n");
1193 		return;
1194 	}
1195 
1196 	if (!tr->allocated_snapshot) {
1197 		trace_array_puts(tr, "*** SNAPSHOT NOT ALLOCATED ***\n");
1198 		trace_array_puts(tr, "*** stopping trace here!   ***\n");
1199 		tracer_tracing_off(tr);
1200 		return;
1201 	}
1202 
1203 	/* Note, snapshot can not be used when the tracer uses it */
1204 	if (tracer->use_max_tr) {
1205 		trace_array_puts(tr, "*** LATENCY TRACER ACTIVE ***\n");
1206 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1207 		return;
1208 	}
1209 
1210 	if (tr->mapped) {
1211 		trace_array_puts(tr, "*** BUFFER MEMORY MAPPED ***\n");
1212 		trace_array_puts(tr, "*** Can not use snapshot (sorry) ***\n");
1213 		return;
1214 	}
1215 
1216 	local_irq_save(flags);
1217 	update_max_tr(tr, current, smp_processor_id(), cond_data);
1218 	local_irq_restore(flags);
1219 }
1220 
1221 void tracing_snapshot_instance(struct trace_array *tr)
1222 {
1223 	tracing_snapshot_instance_cond(tr, NULL);
1224 }
1225 
1226 /**
1227  * tracing_snapshot - take a snapshot of the current buffer.
1228  *
1229  * This causes a swap between the snapshot buffer and the current live
1230  * tracing buffer. You can use this to take snapshots of the live
1231  * trace when some condition is triggered, but continue to trace.
1232  *
1233  * Note, make sure to allocate the snapshot with either
1234  * a tracing_snapshot_alloc(), or by doing it manually
1235  * with: echo 1 > /sys/kernel/tracing/snapshot
1236  *
1237  * If the snapshot buffer is not allocated, it will stop tracing.
1238  * Basically making a permanent snapshot.
1239  */
1240 void tracing_snapshot(void)
1241 {
1242 	struct trace_array *tr = &global_trace;
1243 
1244 	tracing_snapshot_instance(tr);
1245 }
1246 EXPORT_SYMBOL_GPL(tracing_snapshot);
1247 
1248 /**
1249  * tracing_snapshot_cond - conditionally take a snapshot of the current buffer.
1250  * @tr:		The tracing instance to snapshot
1251  * @cond_data:	The data to be tested conditionally, and possibly saved
1252  *
1253  * This is the same as tracing_snapshot() except that the snapshot is
1254  * conditional - the snapshot will only happen if the
1255  * cond_snapshot.update() implementation receiving the cond_data
1256  * returns true, which means that the trace array's cond_snapshot
1257  * update() operation used the cond_data to determine whether the
1258  * snapshot should be taken, and if it was, presumably saved it along
1259  * with the snapshot.
1260  */
1261 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1262 {
1263 	tracing_snapshot_instance_cond(tr, cond_data);
1264 }
1265 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1266 
1267 /**
1268  * tracing_cond_snapshot_data - get the user data associated with a snapshot
1269  * @tr:		The tracing instance
1270  *
1271  * When the user enables a conditional snapshot using
1272  * tracing_snapshot_cond_enable(), the user-defined cond_data is saved
1273  * with the snapshot.  This accessor is used to retrieve it.
1274  *
1275  * Should not be called from cond_snapshot.update(), since it takes
1276  * the tr->max_lock lock, which the code calling
1277  * cond_snapshot.update() has already done.
1278  *
1279  * Returns the cond_data associated with the trace array's snapshot.
1280  */
1281 void *tracing_cond_snapshot_data(struct trace_array *tr)
1282 {
1283 	void *cond_data = NULL;
1284 
1285 	local_irq_disable();
1286 	arch_spin_lock(&tr->max_lock);
1287 
1288 	if (tr->cond_snapshot)
1289 		cond_data = tr->cond_snapshot->cond_data;
1290 
1291 	arch_spin_unlock(&tr->max_lock);
1292 	local_irq_enable();
1293 
1294 	return cond_data;
1295 }
1296 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1297 
1298 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
1299 					struct array_buffer *size_buf, int cpu_id);
1300 static void set_buffer_entries(struct array_buffer *buf, unsigned long val);
1301 
1302 int tracing_alloc_snapshot_instance(struct trace_array *tr)
1303 {
1304 	int order;
1305 	int ret;
1306 
1307 	if (!tr->allocated_snapshot) {
1308 
1309 		/* Make the snapshot buffer have the same order as main buffer */
1310 		order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
1311 		ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
1312 		if (ret < 0)
1313 			return ret;
1314 
1315 		/* allocate spare buffer */
1316 		ret = resize_buffer_duplicate_size(&tr->max_buffer,
1317 				   &tr->array_buffer, RING_BUFFER_ALL_CPUS);
1318 		if (ret < 0)
1319 			return ret;
1320 
1321 		tr->allocated_snapshot = true;
1322 	}
1323 
1324 	return 0;
1325 }
1326 
1327 static void free_snapshot(struct trace_array *tr)
1328 {
1329 	/*
1330 	 * We don't free the ring buffer. instead, resize it because
1331 	 * The max_tr ring buffer has some state (e.g. ring->clock) and
1332 	 * we want preserve it.
1333 	 */
1334 	ring_buffer_subbuf_order_set(tr->max_buffer.buffer, 0);
1335 	ring_buffer_resize(tr->max_buffer.buffer, 1, RING_BUFFER_ALL_CPUS);
1336 	set_buffer_entries(&tr->max_buffer, 1);
1337 	tracing_reset_online_cpus(&tr->max_buffer);
1338 	tr->allocated_snapshot = false;
1339 }
1340 
1341 static int tracing_arm_snapshot_locked(struct trace_array *tr)
1342 {
1343 	int ret;
1344 
1345 	lockdep_assert_held(&trace_types_lock);
1346 
1347 	spin_lock(&tr->snapshot_trigger_lock);
1348 	if (tr->snapshot == UINT_MAX || tr->mapped) {
1349 		spin_unlock(&tr->snapshot_trigger_lock);
1350 		return -EBUSY;
1351 	}
1352 
1353 	tr->snapshot++;
1354 	spin_unlock(&tr->snapshot_trigger_lock);
1355 
1356 	ret = tracing_alloc_snapshot_instance(tr);
1357 	if (ret) {
1358 		spin_lock(&tr->snapshot_trigger_lock);
1359 		tr->snapshot--;
1360 		spin_unlock(&tr->snapshot_trigger_lock);
1361 	}
1362 
1363 	return ret;
1364 }
1365 
1366 int tracing_arm_snapshot(struct trace_array *tr)
1367 {
1368 	int ret;
1369 
1370 	mutex_lock(&trace_types_lock);
1371 	ret = tracing_arm_snapshot_locked(tr);
1372 	mutex_unlock(&trace_types_lock);
1373 
1374 	return ret;
1375 }
1376 
1377 void tracing_disarm_snapshot(struct trace_array *tr)
1378 {
1379 	spin_lock(&tr->snapshot_trigger_lock);
1380 	if (!WARN_ON(!tr->snapshot))
1381 		tr->snapshot--;
1382 	spin_unlock(&tr->snapshot_trigger_lock);
1383 }
1384 
1385 /**
1386  * tracing_alloc_snapshot - allocate snapshot buffer.
1387  *
1388  * This only allocates the snapshot buffer if it isn't already
1389  * allocated - it doesn't also take a snapshot.
1390  *
1391  * This is meant to be used in cases where the snapshot buffer needs
1392  * to be set up for events that can't sleep but need to be able to
1393  * trigger a snapshot.
1394  */
1395 int tracing_alloc_snapshot(void)
1396 {
1397 	struct trace_array *tr = &global_trace;
1398 	int ret;
1399 
1400 	ret = tracing_alloc_snapshot_instance(tr);
1401 	WARN_ON(ret < 0);
1402 
1403 	return ret;
1404 }
1405 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1406 
1407 /**
1408  * tracing_snapshot_alloc - allocate and take a snapshot of the current buffer.
1409  *
1410  * This is similar to tracing_snapshot(), but it will allocate the
1411  * snapshot buffer if it isn't already allocated. Use this only
1412  * where it is safe to sleep, as the allocation may sleep.
1413  *
1414  * This causes a swap between the snapshot buffer and the current live
1415  * tracing buffer. You can use this to take snapshots of the live
1416  * trace when some condition is triggered, but continue to trace.
1417  */
1418 void tracing_snapshot_alloc(void)
1419 {
1420 	int ret;
1421 
1422 	ret = tracing_alloc_snapshot();
1423 	if (ret < 0)
1424 		return;
1425 
1426 	tracing_snapshot();
1427 }
1428 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1429 
1430 /**
1431  * tracing_snapshot_cond_enable - enable conditional snapshot for an instance
1432  * @tr:		The tracing instance
1433  * @cond_data:	User data to associate with the snapshot
1434  * @update:	Implementation of the cond_snapshot update function
1435  *
1436  * Check whether the conditional snapshot for the given instance has
1437  * already been enabled, or if the current tracer is already using a
1438  * snapshot; if so, return -EBUSY, else create a cond_snapshot and
1439  * save the cond_data and update function inside.
1440  *
1441  * Returns 0 if successful, error otherwise.
1442  */
1443 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data,
1444 				 cond_update_fn_t update)
1445 {
1446 	struct cond_snapshot *cond_snapshot;
1447 	int ret = 0;
1448 
1449 	cond_snapshot = kzalloc(sizeof(*cond_snapshot), GFP_KERNEL);
1450 	if (!cond_snapshot)
1451 		return -ENOMEM;
1452 
1453 	cond_snapshot->cond_data = cond_data;
1454 	cond_snapshot->update = update;
1455 
1456 	mutex_lock(&trace_types_lock);
1457 
1458 	if (tr->current_trace->use_max_tr) {
1459 		ret = -EBUSY;
1460 		goto fail_unlock;
1461 	}
1462 
1463 	/*
1464 	 * The cond_snapshot can only change to NULL without the
1465 	 * trace_types_lock. We don't care if we race with it going
1466 	 * to NULL, but we want to make sure that it's not set to
1467 	 * something other than NULL when we get here, which we can
1468 	 * do safely with only holding the trace_types_lock and not
1469 	 * having to take the max_lock.
1470 	 */
1471 	if (tr->cond_snapshot) {
1472 		ret = -EBUSY;
1473 		goto fail_unlock;
1474 	}
1475 
1476 	ret = tracing_arm_snapshot_locked(tr);
1477 	if (ret)
1478 		goto fail_unlock;
1479 
1480 	local_irq_disable();
1481 	arch_spin_lock(&tr->max_lock);
1482 	tr->cond_snapshot = cond_snapshot;
1483 	arch_spin_unlock(&tr->max_lock);
1484 	local_irq_enable();
1485 
1486 	mutex_unlock(&trace_types_lock);
1487 
1488 	return ret;
1489 
1490  fail_unlock:
1491 	mutex_unlock(&trace_types_lock);
1492 	kfree(cond_snapshot);
1493 	return ret;
1494 }
1495 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1496 
1497 /**
1498  * tracing_snapshot_cond_disable - disable conditional snapshot for an instance
1499  * @tr:		The tracing instance
1500  *
1501  * Check whether the conditional snapshot for the given instance is
1502  * enabled; if so, free the cond_snapshot associated with it,
1503  * otherwise return -EINVAL.
1504  *
1505  * Returns 0 if successful, error otherwise.
1506  */
1507 int tracing_snapshot_cond_disable(struct trace_array *tr)
1508 {
1509 	int ret = 0;
1510 
1511 	local_irq_disable();
1512 	arch_spin_lock(&tr->max_lock);
1513 
1514 	if (!tr->cond_snapshot)
1515 		ret = -EINVAL;
1516 	else {
1517 		kfree(tr->cond_snapshot);
1518 		tr->cond_snapshot = NULL;
1519 	}
1520 
1521 	arch_spin_unlock(&tr->max_lock);
1522 	local_irq_enable();
1523 
1524 	tracing_disarm_snapshot(tr);
1525 
1526 	return ret;
1527 }
1528 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1529 #else
1530 void tracing_snapshot(void)
1531 {
1532 	WARN_ONCE(1, "Snapshot feature not enabled, but internal snapshot used");
1533 }
1534 EXPORT_SYMBOL_GPL(tracing_snapshot);
1535 void tracing_snapshot_cond(struct trace_array *tr, void *cond_data)
1536 {
1537 	WARN_ONCE(1, "Snapshot feature not enabled, but internal conditional snapshot used");
1538 }
1539 EXPORT_SYMBOL_GPL(tracing_snapshot_cond);
1540 int tracing_alloc_snapshot(void)
1541 {
1542 	WARN_ONCE(1, "Snapshot feature not enabled, but snapshot allocation used");
1543 	return -ENODEV;
1544 }
1545 EXPORT_SYMBOL_GPL(tracing_alloc_snapshot);
1546 void tracing_snapshot_alloc(void)
1547 {
1548 	/* Give warning */
1549 	tracing_snapshot();
1550 }
1551 EXPORT_SYMBOL_GPL(tracing_snapshot_alloc);
1552 void *tracing_cond_snapshot_data(struct trace_array *tr)
1553 {
1554 	return NULL;
1555 }
1556 EXPORT_SYMBOL_GPL(tracing_cond_snapshot_data);
1557 int tracing_snapshot_cond_enable(struct trace_array *tr, void *cond_data, cond_update_fn_t update)
1558 {
1559 	return -ENODEV;
1560 }
1561 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_enable);
1562 int tracing_snapshot_cond_disable(struct trace_array *tr)
1563 {
1564 	return false;
1565 }
1566 EXPORT_SYMBOL_GPL(tracing_snapshot_cond_disable);
1567 #define free_snapshot(tr)	do { } while (0)
1568 #define tracing_arm_snapshot_locked(tr) ({ -EBUSY; })
1569 #endif /* CONFIG_TRACER_SNAPSHOT */
1570 
1571 void tracer_tracing_off(struct trace_array *tr)
1572 {
1573 	if (tr->array_buffer.buffer)
1574 		ring_buffer_record_off(tr->array_buffer.buffer);
1575 	/*
1576 	 * This flag is looked at when buffers haven't been allocated
1577 	 * yet, or by some tracers (like irqsoff), that just want to
1578 	 * know if the ring buffer has been disabled, but it can handle
1579 	 * races of where it gets disabled but we still do a record.
1580 	 * As the check is in the fast path of the tracers, it is more
1581 	 * important to be fast than accurate.
1582 	 */
1583 	tr->buffer_disabled = 1;
1584 	/* Make the flag seen by readers */
1585 	smp_wmb();
1586 }
1587 
1588 /**
1589  * tracing_off - turn off tracing buffers
1590  *
1591  * This function stops the tracing buffers from recording data.
1592  * It does not disable any overhead the tracers themselves may
1593  * be causing. This function simply causes all recording to
1594  * the ring buffers to fail.
1595  */
1596 void tracing_off(void)
1597 {
1598 	tracer_tracing_off(&global_trace);
1599 }
1600 EXPORT_SYMBOL_GPL(tracing_off);
1601 
1602 void disable_trace_on_warning(void)
1603 {
1604 	if (__disable_trace_on_warning) {
1605 		trace_array_printk_buf(global_trace.array_buffer.buffer, _THIS_IP_,
1606 			"Disabling tracing due to warning\n");
1607 		tracing_off();
1608 	}
1609 }
1610 
1611 /**
1612  * tracer_tracing_is_on - show real state of ring buffer enabled
1613  * @tr : the trace array to know if ring buffer is enabled
1614  *
1615  * Shows real state of the ring buffer if it is enabled or not.
1616  */
1617 bool tracer_tracing_is_on(struct trace_array *tr)
1618 {
1619 	if (tr->array_buffer.buffer)
1620 		return ring_buffer_record_is_set_on(tr->array_buffer.buffer);
1621 	return !tr->buffer_disabled;
1622 }
1623 
1624 /**
1625  * tracing_is_on - show state of ring buffers enabled
1626  */
1627 int tracing_is_on(void)
1628 {
1629 	return tracer_tracing_is_on(&global_trace);
1630 }
1631 EXPORT_SYMBOL_GPL(tracing_is_on);
1632 
1633 static int __init set_buf_size(char *str)
1634 {
1635 	unsigned long buf_size;
1636 
1637 	if (!str)
1638 		return 0;
1639 	buf_size = memparse(str, &str);
1640 	/*
1641 	 * nr_entries can not be zero and the startup
1642 	 * tests require some buffer space. Therefore
1643 	 * ensure we have at least 4096 bytes of buffer.
1644 	 */
1645 	trace_buf_size = max(4096UL, buf_size);
1646 	return 1;
1647 }
1648 __setup("trace_buf_size=", set_buf_size);
1649 
1650 static int __init set_tracing_thresh(char *str)
1651 {
1652 	unsigned long threshold;
1653 	int ret;
1654 
1655 	if (!str)
1656 		return 0;
1657 	ret = kstrtoul(str, 0, &threshold);
1658 	if (ret < 0)
1659 		return 0;
1660 	tracing_thresh = threshold * 1000;
1661 	return 1;
1662 }
1663 __setup("tracing_thresh=", set_tracing_thresh);
1664 
1665 unsigned long nsecs_to_usecs(unsigned long nsecs)
1666 {
1667 	return nsecs / 1000;
1668 }
1669 
1670 /*
1671  * TRACE_FLAGS is defined as a tuple matching bit masks with strings.
1672  * It uses C(a, b) where 'a' is the eval (enum) name and 'b' is the string that
1673  * matches it. By defining "C(a, b) b", TRACE_FLAGS becomes a list
1674  * of strings in the order that the evals (enum) were defined.
1675  */
1676 #undef C
1677 #define C(a, b) b
1678 
1679 /* These must match the bit positions in trace_iterator_flags */
1680 static const char *trace_options[] = {
1681 	TRACE_FLAGS
1682 	NULL
1683 };
1684 
1685 static struct {
1686 	u64 (*func)(void);
1687 	const char *name;
1688 	int in_ns;		/* is this clock in nanoseconds? */
1689 } trace_clocks[] = {
1690 	{ trace_clock_local,		"local",	1 },
1691 	{ trace_clock_global,		"global",	1 },
1692 	{ trace_clock_counter,		"counter",	0 },
1693 	{ trace_clock_jiffies,		"uptime",	0 },
1694 	{ trace_clock,			"perf",		1 },
1695 	{ ktime_get_mono_fast_ns,	"mono",		1 },
1696 	{ ktime_get_raw_fast_ns,	"mono_raw",	1 },
1697 	{ ktime_get_boot_fast_ns,	"boot",		1 },
1698 	{ ktime_get_tai_fast_ns,	"tai",		1 },
1699 	ARCH_TRACE_CLOCKS
1700 };
1701 
1702 bool trace_clock_in_ns(struct trace_array *tr)
1703 {
1704 	if (trace_clocks[tr->clock_id].in_ns)
1705 		return true;
1706 
1707 	return false;
1708 }
1709 
1710 /*
1711  * trace_parser_get_init - gets the buffer for trace parser
1712  */
1713 int trace_parser_get_init(struct trace_parser *parser, int size)
1714 {
1715 	memset(parser, 0, sizeof(*parser));
1716 
1717 	parser->buffer = kmalloc(size, GFP_KERNEL);
1718 	if (!parser->buffer)
1719 		return 1;
1720 
1721 	parser->size = size;
1722 	return 0;
1723 }
1724 
1725 /*
1726  * trace_parser_put - frees the buffer for trace parser
1727  */
1728 void trace_parser_put(struct trace_parser *parser)
1729 {
1730 	kfree(parser->buffer);
1731 	parser->buffer = NULL;
1732 }
1733 
1734 /*
1735  * trace_get_user - reads the user input string separated by  space
1736  * (matched by isspace(ch))
1737  *
1738  * For each string found the 'struct trace_parser' is updated,
1739  * and the function returns.
1740  *
1741  * Returns number of bytes read.
1742  *
1743  * See kernel/trace/trace.h for 'struct trace_parser' details.
1744  */
1745 int trace_get_user(struct trace_parser *parser, const char __user *ubuf,
1746 	size_t cnt, loff_t *ppos)
1747 {
1748 	char ch;
1749 	size_t read = 0;
1750 	ssize_t ret;
1751 
1752 	if (!*ppos)
1753 		trace_parser_clear(parser);
1754 
1755 	ret = get_user(ch, ubuf++);
1756 	if (ret)
1757 		goto out;
1758 
1759 	read++;
1760 	cnt--;
1761 
1762 	/*
1763 	 * The parser is not finished with the last write,
1764 	 * continue reading the user input without skipping spaces.
1765 	 */
1766 	if (!parser->cont) {
1767 		/* skip white space */
1768 		while (cnt && isspace(ch)) {
1769 			ret = get_user(ch, ubuf++);
1770 			if (ret)
1771 				goto out;
1772 			read++;
1773 			cnt--;
1774 		}
1775 
1776 		parser->idx = 0;
1777 
1778 		/* only spaces were written */
1779 		if (isspace(ch) || !ch) {
1780 			*ppos += read;
1781 			ret = read;
1782 			goto out;
1783 		}
1784 	}
1785 
1786 	/* read the non-space input */
1787 	while (cnt && !isspace(ch) && ch) {
1788 		if (parser->idx < parser->size - 1)
1789 			parser->buffer[parser->idx++] = ch;
1790 		else {
1791 			ret = -EINVAL;
1792 			goto out;
1793 		}
1794 		ret = get_user(ch, ubuf++);
1795 		if (ret)
1796 			goto out;
1797 		read++;
1798 		cnt--;
1799 	}
1800 
1801 	/* We either got finished input or we have to wait for another call. */
1802 	if (isspace(ch) || !ch) {
1803 		parser->buffer[parser->idx] = 0;
1804 		parser->cont = false;
1805 	} else if (parser->idx < parser->size - 1) {
1806 		parser->cont = true;
1807 		parser->buffer[parser->idx++] = ch;
1808 		/* Make sure the parsed string always terminates with '\0'. */
1809 		parser->buffer[parser->idx] = 0;
1810 	} else {
1811 		ret = -EINVAL;
1812 		goto out;
1813 	}
1814 
1815 	*ppos += read;
1816 	ret = read;
1817 
1818 out:
1819 	return ret;
1820 }
1821 
1822 /* TODO add a seq_buf_to_buffer() */
1823 static ssize_t trace_seq_to_buffer(struct trace_seq *s, void *buf, size_t cnt)
1824 {
1825 	int len;
1826 
1827 	if (trace_seq_used(s) <= s->readpos)
1828 		return -EBUSY;
1829 
1830 	len = trace_seq_used(s) - s->readpos;
1831 	if (cnt > len)
1832 		cnt = len;
1833 	memcpy(buf, s->buffer + s->readpos, cnt);
1834 
1835 	s->readpos += cnt;
1836 	return cnt;
1837 }
1838 
1839 unsigned long __read_mostly	tracing_thresh;
1840 
1841 #ifdef CONFIG_TRACER_MAX_TRACE
1842 static const struct file_operations tracing_max_lat_fops;
1843 
1844 #ifdef LATENCY_FS_NOTIFY
1845 
1846 static struct workqueue_struct *fsnotify_wq;
1847 
1848 static void latency_fsnotify_workfn(struct work_struct *work)
1849 {
1850 	struct trace_array *tr = container_of(work, struct trace_array,
1851 					      fsnotify_work);
1852 	fsnotify_inode(tr->d_max_latency->d_inode, FS_MODIFY);
1853 }
1854 
1855 static void latency_fsnotify_workfn_irq(struct irq_work *iwork)
1856 {
1857 	struct trace_array *tr = container_of(iwork, struct trace_array,
1858 					      fsnotify_irqwork);
1859 	queue_work(fsnotify_wq, &tr->fsnotify_work);
1860 }
1861 
1862 static void trace_create_maxlat_file(struct trace_array *tr,
1863 				     struct dentry *d_tracer)
1864 {
1865 	INIT_WORK(&tr->fsnotify_work, latency_fsnotify_workfn);
1866 	init_irq_work(&tr->fsnotify_irqwork, latency_fsnotify_workfn_irq);
1867 	tr->d_max_latency = trace_create_file("tracing_max_latency",
1868 					      TRACE_MODE_WRITE,
1869 					      d_tracer, tr,
1870 					      &tracing_max_lat_fops);
1871 }
1872 
1873 __init static int latency_fsnotify_init(void)
1874 {
1875 	fsnotify_wq = alloc_workqueue("tr_max_lat_wq",
1876 				      WQ_UNBOUND | WQ_HIGHPRI, 0);
1877 	if (!fsnotify_wq) {
1878 		pr_err("Unable to allocate tr_max_lat_wq\n");
1879 		return -ENOMEM;
1880 	}
1881 	return 0;
1882 }
1883 
1884 late_initcall_sync(latency_fsnotify_init);
1885 
1886 void latency_fsnotify(struct trace_array *tr)
1887 {
1888 	if (!fsnotify_wq)
1889 		return;
1890 	/*
1891 	 * We cannot call queue_work(&tr->fsnotify_work) from here because it's
1892 	 * possible that we are called from __schedule() or do_idle(), which
1893 	 * could cause a deadlock.
1894 	 */
1895 	irq_work_queue(&tr->fsnotify_irqwork);
1896 }
1897 
1898 #else /* !LATENCY_FS_NOTIFY */
1899 
1900 #define trace_create_maxlat_file(tr, d_tracer)				\
1901 	trace_create_file("tracing_max_latency", TRACE_MODE_WRITE,	\
1902 			  d_tracer, tr, &tracing_max_lat_fops)
1903 
1904 #endif
1905 
1906 /*
1907  * Copy the new maximum trace into the separate maximum-trace
1908  * structure. (this way the maximum trace is permanently saved,
1909  * for later retrieval via /sys/kernel/tracing/tracing_max_latency)
1910  */
1911 static void
1912 __update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu)
1913 {
1914 	struct array_buffer *trace_buf = &tr->array_buffer;
1915 	struct array_buffer *max_buf = &tr->max_buffer;
1916 	struct trace_array_cpu *data = per_cpu_ptr(trace_buf->data, cpu);
1917 	struct trace_array_cpu *max_data = per_cpu_ptr(max_buf->data, cpu);
1918 
1919 	max_buf->cpu = cpu;
1920 	max_buf->time_start = data->preempt_timestamp;
1921 
1922 	max_data->saved_latency = tr->max_latency;
1923 	max_data->critical_start = data->critical_start;
1924 	max_data->critical_end = data->critical_end;
1925 
1926 	strscpy(max_data->comm, tsk->comm);
1927 	max_data->pid = tsk->pid;
1928 	/*
1929 	 * If tsk == current, then use current_uid(), as that does not use
1930 	 * RCU. The irq tracer can be called out of RCU scope.
1931 	 */
1932 	if (tsk == current)
1933 		max_data->uid = current_uid();
1934 	else
1935 		max_data->uid = task_uid(tsk);
1936 
1937 	max_data->nice = tsk->static_prio - 20 - MAX_RT_PRIO;
1938 	max_data->policy = tsk->policy;
1939 	max_data->rt_priority = tsk->rt_priority;
1940 
1941 	/* record this tasks comm */
1942 	tracing_record_cmdline(tsk);
1943 	latency_fsnotify(tr);
1944 }
1945 
1946 /**
1947  * update_max_tr - snapshot all trace buffers from global_trace to max_tr
1948  * @tr: tracer
1949  * @tsk: the task with the latency
1950  * @cpu: The cpu that initiated the trace.
1951  * @cond_data: User data associated with a conditional snapshot
1952  *
1953  * Flip the buffers between the @tr and the max_tr and record information
1954  * about which task was the cause of this latency.
1955  */
1956 void
1957 update_max_tr(struct trace_array *tr, struct task_struct *tsk, int cpu,
1958 	      void *cond_data)
1959 {
1960 	if (tr->stop_count)
1961 		return;
1962 
1963 	WARN_ON_ONCE(!irqs_disabled());
1964 
1965 	if (!tr->allocated_snapshot) {
1966 		/* Only the nop tracer should hit this when disabling */
1967 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
1968 		return;
1969 	}
1970 
1971 	arch_spin_lock(&tr->max_lock);
1972 
1973 	/* Inherit the recordable setting from array_buffer */
1974 	if (ring_buffer_record_is_set_on(tr->array_buffer.buffer))
1975 		ring_buffer_record_on(tr->max_buffer.buffer);
1976 	else
1977 		ring_buffer_record_off(tr->max_buffer.buffer);
1978 
1979 #ifdef CONFIG_TRACER_SNAPSHOT
1980 	if (tr->cond_snapshot && !tr->cond_snapshot->update(tr, cond_data)) {
1981 		arch_spin_unlock(&tr->max_lock);
1982 		return;
1983 	}
1984 #endif
1985 	swap(tr->array_buffer.buffer, tr->max_buffer.buffer);
1986 
1987 	__update_max_tr(tr, tsk, cpu);
1988 
1989 	arch_spin_unlock(&tr->max_lock);
1990 
1991 	/* Any waiters on the old snapshot buffer need to wake up */
1992 	ring_buffer_wake_waiters(tr->array_buffer.buffer, RING_BUFFER_ALL_CPUS);
1993 }
1994 
1995 /**
1996  * update_max_tr_single - only copy one trace over, and reset the rest
1997  * @tr: tracer
1998  * @tsk: task with the latency
1999  * @cpu: the cpu of the buffer to copy.
2000  *
2001  * Flip the trace of a single CPU buffer between the @tr and the max_tr.
2002  */
2003 void
2004 update_max_tr_single(struct trace_array *tr, struct task_struct *tsk, int cpu)
2005 {
2006 	int ret;
2007 
2008 	if (tr->stop_count)
2009 		return;
2010 
2011 	WARN_ON_ONCE(!irqs_disabled());
2012 	if (!tr->allocated_snapshot) {
2013 		/* Only the nop tracer should hit this when disabling */
2014 		WARN_ON_ONCE(tr->current_trace != &nop_trace);
2015 		return;
2016 	}
2017 
2018 	arch_spin_lock(&tr->max_lock);
2019 
2020 	ret = ring_buffer_swap_cpu(tr->max_buffer.buffer, tr->array_buffer.buffer, cpu);
2021 
2022 	if (ret == -EBUSY) {
2023 		/*
2024 		 * We failed to swap the buffer due to a commit taking
2025 		 * place on this CPU. We fail to record, but we reset
2026 		 * the max trace buffer (no one writes directly to it)
2027 		 * and flag that it failed.
2028 		 * Another reason is resize is in progress.
2029 		 */
2030 		trace_array_printk_buf(tr->max_buffer.buffer, _THIS_IP_,
2031 			"Failed to swap buffers due to commit or resize in progress\n");
2032 	}
2033 
2034 	WARN_ON_ONCE(ret && ret != -EAGAIN && ret != -EBUSY);
2035 
2036 	__update_max_tr(tr, tsk, cpu);
2037 	arch_spin_unlock(&tr->max_lock);
2038 }
2039 
2040 #endif /* CONFIG_TRACER_MAX_TRACE */
2041 
2042 struct pipe_wait {
2043 	struct trace_iterator		*iter;
2044 	int				wait_index;
2045 };
2046 
2047 static bool wait_pipe_cond(void *data)
2048 {
2049 	struct pipe_wait *pwait = data;
2050 	struct trace_iterator *iter = pwait->iter;
2051 
2052 	if (atomic_read_acquire(&iter->wait_index) != pwait->wait_index)
2053 		return true;
2054 
2055 	return iter->closed;
2056 }
2057 
2058 static int wait_on_pipe(struct trace_iterator *iter, int full)
2059 {
2060 	struct pipe_wait pwait;
2061 	int ret;
2062 
2063 	/* Iterators are static, they should be filled or empty */
2064 	if (trace_buffer_iter(iter, iter->cpu_file))
2065 		return 0;
2066 
2067 	pwait.wait_index = atomic_read_acquire(&iter->wait_index);
2068 	pwait.iter = iter;
2069 
2070 	ret = ring_buffer_wait(iter->array_buffer->buffer, iter->cpu_file, full,
2071 			       wait_pipe_cond, &pwait);
2072 
2073 #ifdef CONFIG_TRACER_MAX_TRACE
2074 	/*
2075 	 * Make sure this is still the snapshot buffer, as if a snapshot were
2076 	 * to happen, this would now be the main buffer.
2077 	 */
2078 	if (iter->snapshot)
2079 		iter->array_buffer = &iter->tr->max_buffer;
2080 #endif
2081 	return ret;
2082 }
2083 
2084 #ifdef CONFIG_FTRACE_STARTUP_TEST
2085 static bool selftests_can_run;
2086 
2087 struct trace_selftests {
2088 	struct list_head		list;
2089 	struct tracer			*type;
2090 };
2091 
2092 static LIST_HEAD(postponed_selftests);
2093 
2094 static int save_selftest(struct tracer *type)
2095 {
2096 	struct trace_selftests *selftest;
2097 
2098 	selftest = kmalloc(sizeof(*selftest), GFP_KERNEL);
2099 	if (!selftest)
2100 		return -ENOMEM;
2101 
2102 	selftest->type = type;
2103 	list_add(&selftest->list, &postponed_selftests);
2104 	return 0;
2105 }
2106 
2107 static int run_tracer_selftest(struct tracer *type)
2108 {
2109 	struct trace_array *tr = &global_trace;
2110 	struct tracer *saved_tracer = tr->current_trace;
2111 	int ret;
2112 
2113 	if (!type->selftest || tracing_selftest_disabled)
2114 		return 0;
2115 
2116 	/*
2117 	 * If a tracer registers early in boot up (before scheduling is
2118 	 * initialized and such), then do not run its selftests yet.
2119 	 * Instead, run it a little later in the boot process.
2120 	 */
2121 	if (!selftests_can_run)
2122 		return save_selftest(type);
2123 
2124 	if (!tracing_is_on()) {
2125 		pr_warn("Selftest for tracer %s skipped due to tracing disabled\n",
2126 			type->name);
2127 		return 0;
2128 	}
2129 
2130 	/*
2131 	 * Run a selftest on this tracer.
2132 	 * Here we reset the trace buffer, and set the current
2133 	 * tracer to be this tracer. The tracer can then run some
2134 	 * internal tracing to verify that everything is in order.
2135 	 * If we fail, we do not register this tracer.
2136 	 */
2137 	tracing_reset_online_cpus(&tr->array_buffer);
2138 
2139 	tr->current_trace = type;
2140 
2141 #ifdef CONFIG_TRACER_MAX_TRACE
2142 	if (type->use_max_tr) {
2143 		/* If we expanded the buffers, make sure the max is expanded too */
2144 		if (tr->ring_buffer_expanded)
2145 			ring_buffer_resize(tr->max_buffer.buffer, trace_buf_size,
2146 					   RING_BUFFER_ALL_CPUS);
2147 		tr->allocated_snapshot = true;
2148 	}
2149 #endif
2150 
2151 	/* the test is responsible for initializing and enabling */
2152 	pr_info("Testing tracer %s: ", type->name);
2153 	ret = type->selftest(type, tr);
2154 	/* the test is responsible for resetting too */
2155 	tr->current_trace = saved_tracer;
2156 	if (ret) {
2157 		printk(KERN_CONT "FAILED!\n");
2158 		/* Add the warning after printing 'FAILED' */
2159 		WARN_ON(1);
2160 		return -1;
2161 	}
2162 	/* Only reset on passing, to avoid touching corrupted buffers */
2163 	tracing_reset_online_cpus(&tr->array_buffer);
2164 
2165 #ifdef CONFIG_TRACER_MAX_TRACE
2166 	if (type->use_max_tr) {
2167 		tr->allocated_snapshot = false;
2168 
2169 		/* Shrink the max buffer again */
2170 		if (tr->ring_buffer_expanded)
2171 			ring_buffer_resize(tr->max_buffer.buffer, 1,
2172 					   RING_BUFFER_ALL_CPUS);
2173 	}
2174 #endif
2175 
2176 	printk(KERN_CONT "PASSED\n");
2177 	return 0;
2178 }
2179 
2180 static int do_run_tracer_selftest(struct tracer *type)
2181 {
2182 	int ret;
2183 
2184 	/*
2185 	 * Tests can take a long time, especially if they are run one after the
2186 	 * other, as does happen during bootup when all the tracers are
2187 	 * registered. This could cause the soft lockup watchdog to trigger.
2188 	 */
2189 	cond_resched();
2190 
2191 	tracing_selftest_running = true;
2192 	ret = run_tracer_selftest(type);
2193 	tracing_selftest_running = false;
2194 
2195 	return ret;
2196 }
2197 
2198 static __init int init_trace_selftests(void)
2199 {
2200 	struct trace_selftests *p, *n;
2201 	struct tracer *t, **last;
2202 	int ret;
2203 
2204 	selftests_can_run = true;
2205 
2206 	mutex_lock(&trace_types_lock);
2207 
2208 	if (list_empty(&postponed_selftests))
2209 		goto out;
2210 
2211 	pr_info("Running postponed tracer tests:\n");
2212 
2213 	tracing_selftest_running = true;
2214 	list_for_each_entry_safe(p, n, &postponed_selftests, list) {
2215 		/* This loop can take minutes when sanitizers are enabled, so
2216 		 * lets make sure we allow RCU processing.
2217 		 */
2218 		cond_resched();
2219 		ret = run_tracer_selftest(p->type);
2220 		/* If the test fails, then warn and remove from available_tracers */
2221 		if (ret < 0) {
2222 			WARN(1, "tracer: %s failed selftest, disabling\n",
2223 			     p->type->name);
2224 			last = &trace_types;
2225 			for (t = trace_types; t; t = t->next) {
2226 				if (t == p->type) {
2227 					*last = t->next;
2228 					break;
2229 				}
2230 				last = &t->next;
2231 			}
2232 		}
2233 		list_del(&p->list);
2234 		kfree(p);
2235 	}
2236 	tracing_selftest_running = false;
2237 
2238  out:
2239 	mutex_unlock(&trace_types_lock);
2240 
2241 	return 0;
2242 }
2243 core_initcall(init_trace_selftests);
2244 #else
2245 static inline int do_run_tracer_selftest(struct tracer *type)
2246 {
2247 	return 0;
2248 }
2249 #endif /* CONFIG_FTRACE_STARTUP_TEST */
2250 
2251 static void add_tracer_options(struct trace_array *tr, struct tracer *t);
2252 
2253 static void __init apply_trace_boot_options(void);
2254 
2255 /**
2256  * register_tracer - register a tracer with the ftrace system.
2257  * @type: the plugin for the tracer
2258  *
2259  * Register a new plugin tracer.
2260  */
2261 int __init register_tracer(struct tracer *type)
2262 {
2263 	struct tracer *t;
2264 	int ret = 0;
2265 
2266 	if (!type->name) {
2267 		pr_info("Tracer must have a name\n");
2268 		return -1;
2269 	}
2270 
2271 	if (strlen(type->name) >= MAX_TRACER_SIZE) {
2272 		pr_info("Tracer has a name longer than %d\n", MAX_TRACER_SIZE);
2273 		return -1;
2274 	}
2275 
2276 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
2277 		pr_warn("Can not register tracer %s due to lockdown\n",
2278 			   type->name);
2279 		return -EPERM;
2280 	}
2281 
2282 	mutex_lock(&trace_types_lock);
2283 
2284 	for (t = trace_types; t; t = t->next) {
2285 		if (strcmp(type->name, t->name) == 0) {
2286 			/* already found */
2287 			pr_info("Tracer %s already registered\n",
2288 				type->name);
2289 			ret = -1;
2290 			goto out;
2291 		}
2292 	}
2293 
2294 	if (!type->set_flag)
2295 		type->set_flag = &dummy_set_flag;
2296 	if (!type->flags) {
2297 		/*allocate a dummy tracer_flags*/
2298 		type->flags = kmalloc(sizeof(*type->flags), GFP_KERNEL);
2299 		if (!type->flags) {
2300 			ret = -ENOMEM;
2301 			goto out;
2302 		}
2303 		type->flags->val = 0;
2304 		type->flags->opts = dummy_tracer_opt;
2305 	} else
2306 		if (!type->flags->opts)
2307 			type->flags->opts = dummy_tracer_opt;
2308 
2309 	/* store the tracer for __set_tracer_option */
2310 	type->flags->trace = type;
2311 
2312 	ret = do_run_tracer_selftest(type);
2313 	if (ret < 0)
2314 		goto out;
2315 
2316 	type->next = trace_types;
2317 	trace_types = type;
2318 	add_tracer_options(&global_trace, type);
2319 
2320  out:
2321 	mutex_unlock(&trace_types_lock);
2322 
2323 	if (ret || !default_bootup_tracer)
2324 		goto out_unlock;
2325 
2326 	if (strncmp(default_bootup_tracer, type->name, MAX_TRACER_SIZE))
2327 		goto out_unlock;
2328 
2329 	printk(KERN_INFO "Starting tracer '%s'\n", type->name);
2330 	/* Do we want this tracer to start on bootup? */
2331 	tracing_set_tracer(&global_trace, type->name);
2332 	default_bootup_tracer = NULL;
2333 
2334 	apply_trace_boot_options();
2335 
2336 	/* disable other selftests, since this will break it. */
2337 	disable_tracing_selftest("running a tracer");
2338 
2339  out_unlock:
2340 	return ret;
2341 }
2342 
2343 static void tracing_reset_cpu(struct array_buffer *buf, int cpu)
2344 {
2345 	struct trace_buffer *buffer = buf->buffer;
2346 
2347 	if (!buffer)
2348 		return;
2349 
2350 	ring_buffer_record_disable(buffer);
2351 
2352 	/* Make sure all commits have finished */
2353 	synchronize_rcu();
2354 	ring_buffer_reset_cpu(buffer, cpu);
2355 
2356 	ring_buffer_record_enable(buffer);
2357 }
2358 
2359 void tracing_reset_online_cpus(struct array_buffer *buf)
2360 {
2361 	struct trace_buffer *buffer = buf->buffer;
2362 
2363 	if (!buffer)
2364 		return;
2365 
2366 	ring_buffer_record_disable(buffer);
2367 
2368 	/* Make sure all commits have finished */
2369 	synchronize_rcu();
2370 
2371 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2372 
2373 	ring_buffer_reset_online_cpus(buffer);
2374 
2375 	ring_buffer_record_enable(buffer);
2376 }
2377 
2378 static void tracing_reset_all_cpus(struct array_buffer *buf)
2379 {
2380 	struct trace_buffer *buffer = buf->buffer;
2381 
2382 	if (!buffer)
2383 		return;
2384 
2385 	ring_buffer_record_disable(buffer);
2386 
2387 	/* Make sure all commits have finished */
2388 	synchronize_rcu();
2389 
2390 	buf->time_start = buffer_ftrace_now(buf, buf->cpu);
2391 
2392 	ring_buffer_reset(buffer);
2393 
2394 	ring_buffer_record_enable(buffer);
2395 }
2396 
2397 /* Must have trace_types_lock held */
2398 void tracing_reset_all_online_cpus_unlocked(void)
2399 {
2400 	struct trace_array *tr;
2401 
2402 	lockdep_assert_held(&trace_types_lock);
2403 
2404 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
2405 		if (!tr->clear_trace)
2406 			continue;
2407 		tr->clear_trace = false;
2408 		tracing_reset_online_cpus(&tr->array_buffer);
2409 #ifdef CONFIG_TRACER_MAX_TRACE
2410 		tracing_reset_online_cpus(&tr->max_buffer);
2411 #endif
2412 	}
2413 }
2414 
2415 void tracing_reset_all_online_cpus(void)
2416 {
2417 	mutex_lock(&trace_types_lock);
2418 	tracing_reset_all_online_cpus_unlocked();
2419 	mutex_unlock(&trace_types_lock);
2420 }
2421 
2422 int is_tracing_stopped(void)
2423 {
2424 	return global_trace.stop_count;
2425 }
2426 
2427 static void tracing_start_tr(struct trace_array *tr)
2428 {
2429 	struct trace_buffer *buffer;
2430 	unsigned long flags;
2431 
2432 	if (tracing_disabled)
2433 		return;
2434 
2435 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2436 	if (--tr->stop_count) {
2437 		if (WARN_ON_ONCE(tr->stop_count < 0)) {
2438 			/* Someone screwed up their debugging */
2439 			tr->stop_count = 0;
2440 		}
2441 		goto out;
2442 	}
2443 
2444 	/* Prevent the buffers from switching */
2445 	arch_spin_lock(&tr->max_lock);
2446 
2447 	buffer = tr->array_buffer.buffer;
2448 	if (buffer)
2449 		ring_buffer_record_enable(buffer);
2450 
2451 #ifdef CONFIG_TRACER_MAX_TRACE
2452 	buffer = tr->max_buffer.buffer;
2453 	if (buffer)
2454 		ring_buffer_record_enable(buffer);
2455 #endif
2456 
2457 	arch_spin_unlock(&tr->max_lock);
2458 
2459  out:
2460 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2461 }
2462 
2463 /**
2464  * tracing_start - quick start of the tracer
2465  *
2466  * If tracing is enabled but was stopped by tracing_stop,
2467  * this will start the tracer back up.
2468  */
2469 void tracing_start(void)
2470 
2471 {
2472 	return tracing_start_tr(&global_trace);
2473 }
2474 
2475 static void tracing_stop_tr(struct trace_array *tr)
2476 {
2477 	struct trace_buffer *buffer;
2478 	unsigned long flags;
2479 
2480 	raw_spin_lock_irqsave(&tr->start_lock, flags);
2481 	if (tr->stop_count++)
2482 		goto out;
2483 
2484 	/* Prevent the buffers from switching */
2485 	arch_spin_lock(&tr->max_lock);
2486 
2487 	buffer = tr->array_buffer.buffer;
2488 	if (buffer)
2489 		ring_buffer_record_disable(buffer);
2490 
2491 #ifdef CONFIG_TRACER_MAX_TRACE
2492 	buffer = tr->max_buffer.buffer;
2493 	if (buffer)
2494 		ring_buffer_record_disable(buffer);
2495 #endif
2496 
2497 	arch_spin_unlock(&tr->max_lock);
2498 
2499  out:
2500 	raw_spin_unlock_irqrestore(&tr->start_lock, flags);
2501 }
2502 
2503 /**
2504  * tracing_stop - quick stop of the tracer
2505  *
2506  * Light weight way to stop tracing. Use in conjunction with
2507  * tracing_start.
2508  */
2509 void tracing_stop(void)
2510 {
2511 	return tracing_stop_tr(&global_trace);
2512 }
2513 
2514 /*
2515  * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq
2516  * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function
2517  * simplifies those functions and keeps them in sync.
2518  */
2519 enum print_line_t trace_handle_return(struct trace_seq *s)
2520 {
2521 	return trace_seq_has_overflowed(s) ?
2522 		TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED;
2523 }
2524 EXPORT_SYMBOL_GPL(trace_handle_return);
2525 
2526 static unsigned short migration_disable_value(void)
2527 {
2528 #if defined(CONFIG_SMP)
2529 	return current->migration_disabled;
2530 #else
2531 	return 0;
2532 #endif
2533 }
2534 
2535 unsigned int tracing_gen_ctx_irq_test(unsigned int irqs_status)
2536 {
2537 	unsigned int trace_flags = irqs_status;
2538 	unsigned int pc;
2539 
2540 	pc = preempt_count();
2541 
2542 	if (pc & NMI_MASK)
2543 		trace_flags |= TRACE_FLAG_NMI;
2544 	if (pc & HARDIRQ_MASK)
2545 		trace_flags |= TRACE_FLAG_HARDIRQ;
2546 	if (in_serving_softirq())
2547 		trace_flags |= TRACE_FLAG_SOFTIRQ;
2548 	if (softirq_count() >> (SOFTIRQ_SHIFT + 1))
2549 		trace_flags |= TRACE_FLAG_BH_OFF;
2550 
2551 	if (tif_need_resched())
2552 		trace_flags |= TRACE_FLAG_NEED_RESCHED;
2553 	if (test_preempt_need_resched())
2554 		trace_flags |= TRACE_FLAG_PREEMPT_RESCHED;
2555 	if (IS_ENABLED(CONFIG_ARCH_HAS_PREEMPT_LAZY) && tif_test_bit(TIF_NEED_RESCHED_LAZY))
2556 		trace_flags |= TRACE_FLAG_NEED_RESCHED_LAZY;
2557 	return (trace_flags << 16) | (min_t(unsigned int, pc & 0xff, 0xf)) |
2558 		(min_t(unsigned int, migration_disable_value(), 0xf)) << 4;
2559 }
2560 
2561 struct ring_buffer_event *
2562 trace_buffer_lock_reserve(struct trace_buffer *buffer,
2563 			  int type,
2564 			  unsigned long len,
2565 			  unsigned int trace_ctx)
2566 {
2567 	return __trace_buffer_lock_reserve(buffer, type, len, trace_ctx);
2568 }
2569 
2570 DEFINE_PER_CPU(struct ring_buffer_event *, trace_buffered_event);
2571 DEFINE_PER_CPU(int, trace_buffered_event_cnt);
2572 static int trace_buffered_event_ref;
2573 
2574 /**
2575  * trace_buffered_event_enable - enable buffering events
2576  *
2577  * When events are being filtered, it is quicker to use a temporary
2578  * buffer to write the event data into if there's a likely chance
2579  * that it will not be committed. The discard of the ring buffer
2580  * is not as fast as committing, and is much slower than copying
2581  * a commit.
2582  *
2583  * When an event is to be filtered, allocate per cpu buffers to
2584  * write the event data into, and if the event is filtered and discarded
2585  * it is simply dropped, otherwise, the entire data is to be committed
2586  * in one shot.
2587  */
2588 void trace_buffered_event_enable(void)
2589 {
2590 	struct ring_buffer_event *event;
2591 	struct page *page;
2592 	int cpu;
2593 
2594 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2595 
2596 	if (trace_buffered_event_ref++)
2597 		return;
2598 
2599 	for_each_tracing_cpu(cpu) {
2600 		page = alloc_pages_node(cpu_to_node(cpu),
2601 					GFP_KERNEL | __GFP_NORETRY, 0);
2602 		/* This is just an optimization and can handle failures */
2603 		if (!page) {
2604 			pr_err("Failed to allocate event buffer\n");
2605 			break;
2606 		}
2607 
2608 		event = page_address(page);
2609 		memset(event, 0, sizeof(*event));
2610 
2611 		per_cpu(trace_buffered_event, cpu) = event;
2612 
2613 		preempt_disable();
2614 		if (cpu == smp_processor_id() &&
2615 		    __this_cpu_read(trace_buffered_event) !=
2616 		    per_cpu(trace_buffered_event, cpu))
2617 			WARN_ON_ONCE(1);
2618 		preempt_enable();
2619 	}
2620 }
2621 
2622 static void enable_trace_buffered_event(void *data)
2623 {
2624 	/* Probably not needed, but do it anyway */
2625 	smp_rmb();
2626 	this_cpu_dec(trace_buffered_event_cnt);
2627 }
2628 
2629 static void disable_trace_buffered_event(void *data)
2630 {
2631 	this_cpu_inc(trace_buffered_event_cnt);
2632 }
2633 
2634 /**
2635  * trace_buffered_event_disable - disable buffering events
2636  *
2637  * When a filter is removed, it is faster to not use the buffered
2638  * events, and to commit directly into the ring buffer. Free up
2639  * the temp buffers when there are no more users. This requires
2640  * special synchronization with current events.
2641  */
2642 void trace_buffered_event_disable(void)
2643 {
2644 	int cpu;
2645 
2646 	WARN_ON_ONCE(!mutex_is_locked(&event_mutex));
2647 
2648 	if (WARN_ON_ONCE(!trace_buffered_event_ref))
2649 		return;
2650 
2651 	if (--trace_buffered_event_ref)
2652 		return;
2653 
2654 	/* For each CPU, set the buffer as used. */
2655 	on_each_cpu_mask(tracing_buffer_mask, disable_trace_buffered_event,
2656 			 NULL, true);
2657 
2658 	/* Wait for all current users to finish */
2659 	synchronize_rcu();
2660 
2661 	for_each_tracing_cpu(cpu) {
2662 		free_page((unsigned long)per_cpu(trace_buffered_event, cpu));
2663 		per_cpu(trace_buffered_event, cpu) = NULL;
2664 	}
2665 
2666 	/*
2667 	 * Wait for all CPUs that potentially started checking if they can use
2668 	 * their event buffer only after the previous synchronize_rcu() call and
2669 	 * they still read a valid pointer from trace_buffered_event. It must be
2670 	 * ensured they don't see cleared trace_buffered_event_cnt else they
2671 	 * could wrongly decide to use the pointed-to buffer which is now freed.
2672 	 */
2673 	synchronize_rcu();
2674 
2675 	/* For each CPU, relinquish the buffer */
2676 	on_each_cpu_mask(tracing_buffer_mask, enable_trace_buffered_event, NULL,
2677 			 true);
2678 }
2679 
2680 static struct trace_buffer *temp_buffer;
2681 
2682 struct ring_buffer_event *
2683 trace_event_buffer_lock_reserve(struct trace_buffer **current_rb,
2684 			  struct trace_event_file *trace_file,
2685 			  int type, unsigned long len,
2686 			  unsigned int trace_ctx)
2687 {
2688 	struct ring_buffer_event *entry;
2689 	struct trace_array *tr = trace_file->tr;
2690 	int val;
2691 
2692 	*current_rb = tr->array_buffer.buffer;
2693 
2694 	if (!tr->no_filter_buffering_ref &&
2695 	    (trace_file->flags & (EVENT_FILE_FL_SOFT_DISABLED | EVENT_FILE_FL_FILTERED))) {
2696 		preempt_disable_notrace();
2697 		/*
2698 		 * Filtering is on, so try to use the per cpu buffer first.
2699 		 * This buffer will simulate a ring_buffer_event,
2700 		 * where the type_len is zero and the array[0] will
2701 		 * hold the full length.
2702 		 * (see include/linux/ring-buffer.h for details on
2703 		 *  how the ring_buffer_event is structured).
2704 		 *
2705 		 * Using a temp buffer during filtering and copying it
2706 		 * on a matched filter is quicker than writing directly
2707 		 * into the ring buffer and then discarding it when
2708 		 * it doesn't match. That is because the discard
2709 		 * requires several atomic operations to get right.
2710 		 * Copying on match and doing nothing on a failed match
2711 		 * is still quicker than no copy on match, but having
2712 		 * to discard out of the ring buffer on a failed match.
2713 		 */
2714 		if ((entry = __this_cpu_read(trace_buffered_event))) {
2715 			int max_len = PAGE_SIZE - struct_size(entry, array, 1);
2716 
2717 			val = this_cpu_inc_return(trace_buffered_event_cnt);
2718 
2719 			/*
2720 			 * Preemption is disabled, but interrupts and NMIs
2721 			 * can still come in now. If that happens after
2722 			 * the above increment, then it will have to go
2723 			 * back to the old method of allocating the event
2724 			 * on the ring buffer, and if the filter fails, it
2725 			 * will have to call ring_buffer_discard_commit()
2726 			 * to remove it.
2727 			 *
2728 			 * Need to also check the unlikely case that the
2729 			 * length is bigger than the temp buffer size.
2730 			 * If that happens, then the reserve is pretty much
2731 			 * guaranteed to fail, as the ring buffer currently
2732 			 * only allows events less than a page. But that may
2733 			 * change in the future, so let the ring buffer reserve
2734 			 * handle the failure in that case.
2735 			 */
2736 			if (val == 1 && likely(len <= max_len)) {
2737 				trace_event_setup(entry, type, trace_ctx);
2738 				entry->array[0] = len;
2739 				/* Return with preemption disabled */
2740 				return entry;
2741 			}
2742 			this_cpu_dec(trace_buffered_event_cnt);
2743 		}
2744 		/* __trace_buffer_lock_reserve() disables preemption */
2745 		preempt_enable_notrace();
2746 	}
2747 
2748 	entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2749 					    trace_ctx);
2750 	/*
2751 	 * If tracing is off, but we have triggers enabled
2752 	 * we still need to look at the event data. Use the temp_buffer
2753 	 * to store the trace event for the trigger to use. It's recursive
2754 	 * safe and will not be recorded anywhere.
2755 	 */
2756 	if (!entry && trace_file->flags & EVENT_FILE_FL_TRIGGER_COND) {
2757 		*current_rb = temp_buffer;
2758 		entry = __trace_buffer_lock_reserve(*current_rb, type, len,
2759 						    trace_ctx);
2760 	}
2761 	return entry;
2762 }
2763 EXPORT_SYMBOL_GPL(trace_event_buffer_lock_reserve);
2764 
2765 static DEFINE_RAW_SPINLOCK(tracepoint_iter_lock);
2766 static DEFINE_MUTEX(tracepoint_printk_mutex);
2767 
2768 static void output_printk(struct trace_event_buffer *fbuffer)
2769 {
2770 	struct trace_event_call *event_call;
2771 	struct trace_event_file *file;
2772 	struct trace_event *event;
2773 	unsigned long flags;
2774 	struct trace_iterator *iter = tracepoint_print_iter;
2775 
2776 	/* We should never get here if iter is NULL */
2777 	if (WARN_ON_ONCE(!iter))
2778 		return;
2779 
2780 	event_call = fbuffer->trace_file->event_call;
2781 	if (!event_call || !event_call->event.funcs ||
2782 	    !event_call->event.funcs->trace)
2783 		return;
2784 
2785 	file = fbuffer->trace_file;
2786 	if (test_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags) ||
2787 	    (unlikely(file->flags & EVENT_FILE_FL_FILTERED) &&
2788 	     !filter_match_preds(file->filter, fbuffer->entry)))
2789 		return;
2790 
2791 	event = &fbuffer->trace_file->event_call->event;
2792 
2793 	raw_spin_lock_irqsave(&tracepoint_iter_lock, flags);
2794 	trace_seq_init(&iter->seq);
2795 	iter->ent = fbuffer->entry;
2796 	event_call->event.funcs->trace(iter, 0, event);
2797 	trace_seq_putc(&iter->seq, 0);
2798 	printk("%s", iter->seq.buffer);
2799 
2800 	raw_spin_unlock_irqrestore(&tracepoint_iter_lock, flags);
2801 }
2802 
2803 int tracepoint_printk_sysctl(const struct ctl_table *table, int write,
2804 			     void *buffer, size_t *lenp,
2805 			     loff_t *ppos)
2806 {
2807 	int save_tracepoint_printk;
2808 	int ret;
2809 
2810 	mutex_lock(&tracepoint_printk_mutex);
2811 	save_tracepoint_printk = tracepoint_printk;
2812 
2813 	ret = proc_dointvec(table, write, buffer, lenp, ppos);
2814 
2815 	/*
2816 	 * This will force exiting early, as tracepoint_printk
2817 	 * is always zero when tracepoint_printk_iter is not allocated
2818 	 */
2819 	if (!tracepoint_print_iter)
2820 		tracepoint_printk = 0;
2821 
2822 	if (save_tracepoint_printk == tracepoint_printk)
2823 		goto out;
2824 
2825 	if (tracepoint_printk)
2826 		static_key_enable(&tracepoint_printk_key.key);
2827 	else
2828 		static_key_disable(&tracepoint_printk_key.key);
2829 
2830  out:
2831 	mutex_unlock(&tracepoint_printk_mutex);
2832 
2833 	return ret;
2834 }
2835 
2836 void trace_event_buffer_commit(struct trace_event_buffer *fbuffer)
2837 {
2838 	enum event_trigger_type tt = ETT_NONE;
2839 	struct trace_event_file *file = fbuffer->trace_file;
2840 
2841 	if (__event_trigger_test_discard(file, fbuffer->buffer, fbuffer->event,
2842 			fbuffer->entry, &tt))
2843 		goto discard;
2844 
2845 	if (static_key_false(&tracepoint_printk_key.key))
2846 		output_printk(fbuffer);
2847 
2848 	if (static_branch_unlikely(&trace_event_exports_enabled))
2849 		ftrace_exports(fbuffer->event, TRACE_EXPORT_EVENT);
2850 
2851 	trace_buffer_unlock_commit_regs(file->tr, fbuffer->buffer,
2852 			fbuffer->event, fbuffer->trace_ctx, fbuffer->regs);
2853 
2854 discard:
2855 	if (tt)
2856 		event_triggers_post_call(file, tt);
2857 
2858 }
2859 EXPORT_SYMBOL_GPL(trace_event_buffer_commit);
2860 
2861 /*
2862  * Skip 3:
2863  *
2864  *   trace_buffer_unlock_commit_regs()
2865  *   trace_event_buffer_commit()
2866  *   trace_event_raw_event_xxx()
2867  */
2868 # define STACK_SKIP 3
2869 
2870 void trace_buffer_unlock_commit_regs(struct trace_array *tr,
2871 				     struct trace_buffer *buffer,
2872 				     struct ring_buffer_event *event,
2873 				     unsigned int trace_ctx,
2874 				     struct pt_regs *regs)
2875 {
2876 	__buffer_unlock_commit(buffer, event);
2877 
2878 	/*
2879 	 * If regs is not set, then skip the necessary functions.
2880 	 * Note, we can still get here via blktrace, wakeup tracer
2881 	 * and mmiotrace, but that's ok if they lose a function or
2882 	 * two. They are not that meaningful.
2883 	 */
2884 	ftrace_trace_stack(tr, buffer, trace_ctx, regs ? 0 : STACK_SKIP, regs);
2885 	ftrace_trace_userstack(tr, buffer, trace_ctx);
2886 }
2887 
2888 /*
2889  * Similar to trace_buffer_unlock_commit_regs() but do not dump stack.
2890  */
2891 void
2892 trace_buffer_unlock_commit_nostack(struct trace_buffer *buffer,
2893 				   struct ring_buffer_event *event)
2894 {
2895 	__buffer_unlock_commit(buffer, event);
2896 }
2897 
2898 void
2899 trace_function(struct trace_array *tr, unsigned long ip, unsigned long
2900 	       parent_ip, unsigned int trace_ctx)
2901 {
2902 	struct trace_buffer *buffer = tr->array_buffer.buffer;
2903 	struct ring_buffer_event *event;
2904 	struct ftrace_entry *entry;
2905 
2906 	event = __trace_buffer_lock_reserve(buffer, TRACE_FN, sizeof(*entry),
2907 					    trace_ctx);
2908 	if (!event)
2909 		return;
2910 	entry	= ring_buffer_event_data(event);
2911 	entry->ip			= ip;
2912 	entry->parent_ip		= parent_ip;
2913 
2914 	if (static_branch_unlikely(&trace_function_exports_enabled))
2915 		ftrace_exports(event, TRACE_EXPORT_FUNCTION);
2916 	__buffer_unlock_commit(buffer, event);
2917 }
2918 
2919 #ifdef CONFIG_STACKTRACE
2920 
2921 /* Allow 4 levels of nesting: normal, softirq, irq, NMI */
2922 #define FTRACE_KSTACK_NESTING	4
2923 
2924 #define FTRACE_KSTACK_ENTRIES	(SZ_4K / FTRACE_KSTACK_NESTING)
2925 
2926 struct ftrace_stack {
2927 	unsigned long		calls[FTRACE_KSTACK_ENTRIES];
2928 };
2929 
2930 
2931 struct ftrace_stacks {
2932 	struct ftrace_stack	stacks[FTRACE_KSTACK_NESTING];
2933 };
2934 
2935 static DEFINE_PER_CPU(struct ftrace_stacks, ftrace_stacks);
2936 static DEFINE_PER_CPU(int, ftrace_stack_reserve);
2937 
2938 static void __ftrace_trace_stack(struct trace_array *tr,
2939 				 struct trace_buffer *buffer,
2940 				 unsigned int trace_ctx,
2941 				 int skip, struct pt_regs *regs)
2942 {
2943 	struct ring_buffer_event *event;
2944 	unsigned int size, nr_entries;
2945 	struct ftrace_stack *fstack;
2946 	struct stack_entry *entry;
2947 	int stackidx;
2948 
2949 	/*
2950 	 * Add one, for this function and the call to save_stack_trace()
2951 	 * If regs is set, then these functions will not be in the way.
2952 	 */
2953 #ifndef CONFIG_UNWINDER_ORC
2954 	if (!regs)
2955 		skip++;
2956 #endif
2957 
2958 	preempt_disable_notrace();
2959 
2960 	stackidx = __this_cpu_inc_return(ftrace_stack_reserve) - 1;
2961 
2962 	/* This should never happen. If it does, yell once and skip */
2963 	if (WARN_ON_ONCE(stackidx >= FTRACE_KSTACK_NESTING))
2964 		goto out;
2965 
2966 	/*
2967 	 * The above __this_cpu_inc_return() is 'atomic' cpu local. An
2968 	 * interrupt will either see the value pre increment or post
2969 	 * increment. If the interrupt happens pre increment it will have
2970 	 * restored the counter when it returns.  We just need a barrier to
2971 	 * keep gcc from moving things around.
2972 	 */
2973 	barrier();
2974 
2975 	fstack = this_cpu_ptr(ftrace_stacks.stacks) + stackidx;
2976 	size = ARRAY_SIZE(fstack->calls);
2977 
2978 	if (regs) {
2979 		nr_entries = stack_trace_save_regs(regs, fstack->calls,
2980 						   size, skip);
2981 	} else {
2982 		nr_entries = stack_trace_save(fstack->calls, size, skip);
2983 	}
2984 
2985 #ifdef CONFIG_DYNAMIC_FTRACE
2986 	/* Mark entry of stack trace as trampoline code */
2987 	if (tr->ops && tr->ops->trampoline) {
2988 		unsigned long tramp_start = tr->ops->trampoline;
2989 		unsigned long tramp_end = tramp_start + tr->ops->trampoline_size;
2990 		unsigned long *calls = fstack->calls;
2991 
2992 		for (int i = 0; i < nr_entries; i++) {
2993 			if (calls[i] >= tramp_start && calls[i] < tramp_end)
2994 				calls[i] = FTRACE_TRAMPOLINE_MARKER;
2995 		}
2996 	}
2997 #endif
2998 
2999 	event = __trace_buffer_lock_reserve(buffer, TRACE_STACK,
3000 				    struct_size(entry, caller, nr_entries),
3001 				    trace_ctx);
3002 	if (!event)
3003 		goto out;
3004 	entry = ring_buffer_event_data(event);
3005 
3006 	entry->size = nr_entries;
3007 	memcpy(&entry->caller, fstack->calls,
3008 	       flex_array_size(entry, caller, nr_entries));
3009 
3010 	__buffer_unlock_commit(buffer, event);
3011 
3012  out:
3013 	/* Again, don't let gcc optimize things here */
3014 	barrier();
3015 	__this_cpu_dec(ftrace_stack_reserve);
3016 	preempt_enable_notrace();
3017 
3018 }
3019 
3020 static inline void ftrace_trace_stack(struct trace_array *tr,
3021 				      struct trace_buffer *buffer,
3022 				      unsigned int trace_ctx,
3023 				      int skip, struct pt_regs *regs)
3024 {
3025 	if (!(tr->trace_flags & TRACE_ITER_STACKTRACE))
3026 		return;
3027 
3028 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, regs);
3029 }
3030 
3031 void __trace_stack(struct trace_array *tr, unsigned int trace_ctx,
3032 		   int skip)
3033 {
3034 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3035 
3036 	if (rcu_is_watching()) {
3037 		__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3038 		return;
3039 	}
3040 
3041 	if (WARN_ON_ONCE(IS_ENABLED(CONFIG_GENERIC_ENTRY)))
3042 		return;
3043 
3044 	/*
3045 	 * When an NMI triggers, RCU is enabled via ct_nmi_enter(),
3046 	 * but if the above rcu_is_watching() failed, then the NMI
3047 	 * triggered someplace critical, and ct_irq_enter() should
3048 	 * not be called from NMI.
3049 	 */
3050 	if (unlikely(in_nmi()))
3051 		return;
3052 
3053 	ct_irq_enter_irqson();
3054 	__ftrace_trace_stack(tr, buffer, trace_ctx, skip, NULL);
3055 	ct_irq_exit_irqson();
3056 }
3057 
3058 /**
3059  * trace_dump_stack - record a stack back trace in the trace buffer
3060  * @skip: Number of functions to skip (helper handlers)
3061  */
3062 void trace_dump_stack(int skip)
3063 {
3064 	if (tracing_disabled || tracing_selftest_running)
3065 		return;
3066 
3067 #ifndef CONFIG_UNWINDER_ORC
3068 	/* Skip 1 to skip this function. */
3069 	skip++;
3070 #endif
3071 	__ftrace_trace_stack(printk_trace, printk_trace->array_buffer.buffer,
3072 				tracing_gen_ctx(), skip, NULL);
3073 }
3074 EXPORT_SYMBOL_GPL(trace_dump_stack);
3075 
3076 #ifdef CONFIG_USER_STACKTRACE_SUPPORT
3077 static DEFINE_PER_CPU(int, user_stack_count);
3078 
3079 static void
3080 ftrace_trace_userstack(struct trace_array *tr,
3081 		       struct trace_buffer *buffer, unsigned int trace_ctx)
3082 {
3083 	struct ring_buffer_event *event;
3084 	struct userstack_entry *entry;
3085 
3086 	if (!(tr->trace_flags & TRACE_ITER_USERSTACKTRACE))
3087 		return;
3088 
3089 	/*
3090 	 * NMIs can not handle page faults, even with fix ups.
3091 	 * The save user stack can (and often does) fault.
3092 	 */
3093 	if (unlikely(in_nmi()))
3094 		return;
3095 
3096 	/*
3097 	 * prevent recursion, since the user stack tracing may
3098 	 * trigger other kernel events.
3099 	 */
3100 	preempt_disable();
3101 	if (__this_cpu_read(user_stack_count))
3102 		goto out;
3103 
3104 	__this_cpu_inc(user_stack_count);
3105 
3106 	event = __trace_buffer_lock_reserve(buffer, TRACE_USER_STACK,
3107 					    sizeof(*entry), trace_ctx);
3108 	if (!event)
3109 		goto out_drop_count;
3110 	entry	= ring_buffer_event_data(event);
3111 
3112 	entry->tgid		= current->tgid;
3113 	memset(&entry->caller, 0, sizeof(entry->caller));
3114 
3115 	stack_trace_save_user(entry->caller, FTRACE_STACK_ENTRIES);
3116 	__buffer_unlock_commit(buffer, event);
3117 
3118  out_drop_count:
3119 	__this_cpu_dec(user_stack_count);
3120  out:
3121 	preempt_enable();
3122 }
3123 #else /* CONFIG_USER_STACKTRACE_SUPPORT */
3124 static void ftrace_trace_userstack(struct trace_array *tr,
3125 				   struct trace_buffer *buffer,
3126 				   unsigned int trace_ctx)
3127 {
3128 }
3129 #endif /* !CONFIG_USER_STACKTRACE_SUPPORT */
3130 
3131 #endif /* CONFIG_STACKTRACE */
3132 
3133 static inline void
3134 func_repeats_set_delta_ts(struct func_repeats_entry *entry,
3135 			  unsigned long long delta)
3136 {
3137 	entry->bottom_delta_ts = delta & U32_MAX;
3138 	entry->top_delta_ts = (delta >> 32);
3139 }
3140 
3141 void trace_last_func_repeats(struct trace_array *tr,
3142 			     struct trace_func_repeats *last_info,
3143 			     unsigned int trace_ctx)
3144 {
3145 	struct trace_buffer *buffer = tr->array_buffer.buffer;
3146 	struct func_repeats_entry *entry;
3147 	struct ring_buffer_event *event;
3148 	u64 delta;
3149 
3150 	event = __trace_buffer_lock_reserve(buffer, TRACE_FUNC_REPEATS,
3151 					    sizeof(*entry), trace_ctx);
3152 	if (!event)
3153 		return;
3154 
3155 	delta = ring_buffer_event_time_stamp(buffer, event) -
3156 		last_info->ts_last_call;
3157 
3158 	entry = ring_buffer_event_data(event);
3159 	entry->ip = last_info->ip;
3160 	entry->parent_ip = last_info->parent_ip;
3161 	entry->count = last_info->count;
3162 	func_repeats_set_delta_ts(entry, delta);
3163 
3164 	__buffer_unlock_commit(buffer, event);
3165 }
3166 
3167 /* created for use with alloc_percpu */
3168 struct trace_buffer_struct {
3169 	int nesting;
3170 	char buffer[4][TRACE_BUF_SIZE];
3171 };
3172 
3173 static struct trace_buffer_struct __percpu *trace_percpu_buffer;
3174 
3175 /*
3176  * This allows for lockless recording.  If we're nested too deeply, then
3177  * this returns NULL.
3178  */
3179 static char *get_trace_buf(void)
3180 {
3181 	struct trace_buffer_struct *buffer = this_cpu_ptr(trace_percpu_buffer);
3182 
3183 	if (!trace_percpu_buffer || buffer->nesting >= 4)
3184 		return NULL;
3185 
3186 	buffer->nesting++;
3187 
3188 	/* Interrupts must see nesting incremented before we use the buffer */
3189 	barrier();
3190 	return &buffer->buffer[buffer->nesting - 1][0];
3191 }
3192 
3193 static void put_trace_buf(void)
3194 {
3195 	/* Don't let the decrement of nesting leak before this */
3196 	barrier();
3197 	this_cpu_dec(trace_percpu_buffer->nesting);
3198 }
3199 
3200 static int alloc_percpu_trace_buffer(void)
3201 {
3202 	struct trace_buffer_struct __percpu *buffers;
3203 
3204 	if (trace_percpu_buffer)
3205 		return 0;
3206 
3207 	buffers = alloc_percpu(struct trace_buffer_struct);
3208 	if (MEM_FAIL(!buffers, "Could not allocate percpu trace_printk buffer"))
3209 		return -ENOMEM;
3210 
3211 	trace_percpu_buffer = buffers;
3212 	return 0;
3213 }
3214 
3215 static int buffers_allocated;
3216 
3217 void trace_printk_init_buffers(void)
3218 {
3219 	if (buffers_allocated)
3220 		return;
3221 
3222 	if (alloc_percpu_trace_buffer())
3223 		return;
3224 
3225 	/* trace_printk() is for debug use only. Don't use it in production. */
3226 
3227 	pr_warn("\n");
3228 	pr_warn("**********************************************************\n");
3229 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3230 	pr_warn("**                                                      **\n");
3231 	pr_warn("** trace_printk() being used. Allocating extra memory.  **\n");
3232 	pr_warn("**                                                      **\n");
3233 	pr_warn("** This means that this is a DEBUG kernel and it is     **\n");
3234 	pr_warn("** unsafe for production use.                           **\n");
3235 	pr_warn("**                                                      **\n");
3236 	pr_warn("** If you see this message and you are not debugging    **\n");
3237 	pr_warn("** the kernel, report this immediately to your vendor!  **\n");
3238 	pr_warn("**                                                      **\n");
3239 	pr_warn("**   NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE NOTICE   **\n");
3240 	pr_warn("**********************************************************\n");
3241 
3242 	/* Expand the buffers to set size */
3243 	tracing_update_buffers(&global_trace);
3244 
3245 	buffers_allocated = 1;
3246 
3247 	/*
3248 	 * trace_printk_init_buffers() can be called by modules.
3249 	 * If that happens, then we need to start cmdline recording
3250 	 * directly here. If the global_trace.buffer is already
3251 	 * allocated here, then this was called by module code.
3252 	 */
3253 	if (global_trace.array_buffer.buffer)
3254 		tracing_start_cmdline_record();
3255 }
3256 EXPORT_SYMBOL_GPL(trace_printk_init_buffers);
3257 
3258 void trace_printk_start_comm(void)
3259 {
3260 	/* Start tracing comms if trace printk is set */
3261 	if (!buffers_allocated)
3262 		return;
3263 	tracing_start_cmdline_record();
3264 }
3265 
3266 static void trace_printk_start_stop_comm(int enabled)
3267 {
3268 	if (!buffers_allocated)
3269 		return;
3270 
3271 	if (enabled)
3272 		tracing_start_cmdline_record();
3273 	else
3274 		tracing_stop_cmdline_record();
3275 }
3276 
3277 /**
3278  * trace_vbprintk - write binary msg to tracing buffer
3279  * @ip:    The address of the caller
3280  * @fmt:   The string format to write to the buffer
3281  * @args:  Arguments for @fmt
3282  */
3283 int trace_vbprintk(unsigned long ip, const char *fmt, va_list args)
3284 {
3285 	struct ring_buffer_event *event;
3286 	struct trace_buffer *buffer;
3287 	struct trace_array *tr = READ_ONCE(printk_trace);
3288 	struct bprint_entry *entry;
3289 	unsigned int trace_ctx;
3290 	char *tbuffer;
3291 	int len = 0, size;
3292 
3293 	if (!printk_binsafe(tr))
3294 		return trace_vprintk(ip, fmt, args);
3295 
3296 	if (unlikely(tracing_selftest_running || tracing_disabled))
3297 		return 0;
3298 
3299 	/* Don't pollute graph traces with trace_vprintk internals */
3300 	pause_graph_tracing();
3301 
3302 	trace_ctx = tracing_gen_ctx();
3303 	preempt_disable_notrace();
3304 
3305 	tbuffer = get_trace_buf();
3306 	if (!tbuffer) {
3307 		len = 0;
3308 		goto out_nobuffer;
3309 	}
3310 
3311 	len = vbin_printf((u32 *)tbuffer, TRACE_BUF_SIZE/sizeof(int), fmt, args);
3312 
3313 	if (len > TRACE_BUF_SIZE/sizeof(int) || len < 0)
3314 		goto out_put;
3315 
3316 	size = sizeof(*entry) + sizeof(u32) * len;
3317 	buffer = tr->array_buffer.buffer;
3318 	ring_buffer_nest_start(buffer);
3319 	event = __trace_buffer_lock_reserve(buffer, TRACE_BPRINT, size,
3320 					    trace_ctx);
3321 	if (!event)
3322 		goto out;
3323 	entry = ring_buffer_event_data(event);
3324 	entry->ip			= ip;
3325 	entry->fmt			= fmt;
3326 
3327 	memcpy(entry->buf, tbuffer, sizeof(u32) * len);
3328 	__buffer_unlock_commit(buffer, event);
3329 	ftrace_trace_stack(tr, buffer, trace_ctx, 6, NULL);
3330 
3331 out:
3332 	ring_buffer_nest_end(buffer);
3333 out_put:
3334 	put_trace_buf();
3335 
3336 out_nobuffer:
3337 	preempt_enable_notrace();
3338 	unpause_graph_tracing();
3339 
3340 	return len;
3341 }
3342 EXPORT_SYMBOL_GPL(trace_vbprintk);
3343 
3344 __printf(3, 0)
3345 static int
3346 __trace_array_vprintk(struct trace_buffer *buffer,
3347 		      unsigned long ip, const char *fmt, va_list args)
3348 {
3349 	struct ring_buffer_event *event;
3350 	int len = 0, size;
3351 	struct print_entry *entry;
3352 	unsigned int trace_ctx;
3353 	char *tbuffer;
3354 
3355 	if (tracing_disabled)
3356 		return 0;
3357 
3358 	/* Don't pollute graph traces with trace_vprintk internals */
3359 	pause_graph_tracing();
3360 
3361 	trace_ctx = tracing_gen_ctx();
3362 	preempt_disable_notrace();
3363 
3364 
3365 	tbuffer = get_trace_buf();
3366 	if (!tbuffer) {
3367 		len = 0;
3368 		goto out_nobuffer;
3369 	}
3370 
3371 	len = vscnprintf(tbuffer, TRACE_BUF_SIZE, fmt, args);
3372 
3373 	size = sizeof(*entry) + len + 1;
3374 	ring_buffer_nest_start(buffer);
3375 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
3376 					    trace_ctx);
3377 	if (!event)
3378 		goto out;
3379 	entry = ring_buffer_event_data(event);
3380 	entry->ip = ip;
3381 
3382 	memcpy(&entry->buf, tbuffer, len + 1);
3383 	__buffer_unlock_commit(buffer, event);
3384 	ftrace_trace_stack(printk_trace, buffer, trace_ctx, 6, NULL);
3385 
3386 out:
3387 	ring_buffer_nest_end(buffer);
3388 	put_trace_buf();
3389 
3390 out_nobuffer:
3391 	preempt_enable_notrace();
3392 	unpause_graph_tracing();
3393 
3394 	return len;
3395 }
3396 
3397 __printf(3, 0)
3398 int trace_array_vprintk(struct trace_array *tr,
3399 			unsigned long ip, const char *fmt, va_list args)
3400 {
3401 	if (tracing_selftest_running && tr == &global_trace)
3402 		return 0;
3403 
3404 	return __trace_array_vprintk(tr->array_buffer.buffer, ip, fmt, args);
3405 }
3406 
3407 /**
3408  * trace_array_printk - Print a message to a specific instance
3409  * @tr: The instance trace_array descriptor
3410  * @ip: The instruction pointer that this is called from.
3411  * @fmt: The format to print (printf format)
3412  *
3413  * If a subsystem sets up its own instance, they have the right to
3414  * printk strings into their tracing instance buffer using this
3415  * function. Note, this function will not write into the top level
3416  * buffer (use trace_printk() for that), as writing into the top level
3417  * buffer should only have events that can be individually disabled.
3418  * trace_printk() is only used for debugging a kernel, and should not
3419  * be ever incorporated in normal use.
3420  *
3421  * trace_array_printk() can be used, as it will not add noise to the
3422  * top level tracing buffer.
3423  *
3424  * Note, trace_array_init_printk() must be called on @tr before this
3425  * can be used.
3426  */
3427 __printf(3, 0)
3428 int trace_array_printk(struct trace_array *tr,
3429 		       unsigned long ip, const char *fmt, ...)
3430 {
3431 	int ret;
3432 	va_list ap;
3433 
3434 	if (!tr)
3435 		return -ENOENT;
3436 
3437 	/* This is only allowed for created instances */
3438 	if (tr == &global_trace)
3439 		return 0;
3440 
3441 	if (!(tr->trace_flags & TRACE_ITER_PRINTK))
3442 		return 0;
3443 
3444 	va_start(ap, fmt);
3445 	ret = trace_array_vprintk(tr, ip, fmt, ap);
3446 	va_end(ap);
3447 	return ret;
3448 }
3449 EXPORT_SYMBOL_GPL(trace_array_printk);
3450 
3451 /**
3452  * trace_array_init_printk - Initialize buffers for trace_array_printk()
3453  * @tr: The trace array to initialize the buffers for
3454  *
3455  * As trace_array_printk() only writes into instances, they are OK to
3456  * have in the kernel (unlike trace_printk()). This needs to be called
3457  * before trace_array_printk() can be used on a trace_array.
3458  */
3459 int trace_array_init_printk(struct trace_array *tr)
3460 {
3461 	if (!tr)
3462 		return -ENOENT;
3463 
3464 	/* This is only allowed for created instances */
3465 	if (tr == &global_trace)
3466 		return -EINVAL;
3467 
3468 	return alloc_percpu_trace_buffer();
3469 }
3470 EXPORT_SYMBOL_GPL(trace_array_init_printk);
3471 
3472 __printf(3, 4)
3473 int trace_array_printk_buf(struct trace_buffer *buffer,
3474 			   unsigned long ip, const char *fmt, ...)
3475 {
3476 	int ret;
3477 	va_list ap;
3478 
3479 	if (!(printk_trace->trace_flags & TRACE_ITER_PRINTK))
3480 		return 0;
3481 
3482 	va_start(ap, fmt);
3483 	ret = __trace_array_vprintk(buffer, ip, fmt, ap);
3484 	va_end(ap);
3485 	return ret;
3486 }
3487 
3488 __printf(2, 0)
3489 int trace_vprintk(unsigned long ip, const char *fmt, va_list args)
3490 {
3491 	return trace_array_vprintk(printk_trace, ip, fmt, args);
3492 }
3493 EXPORT_SYMBOL_GPL(trace_vprintk);
3494 
3495 static void trace_iterator_increment(struct trace_iterator *iter)
3496 {
3497 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, iter->cpu);
3498 
3499 	iter->idx++;
3500 	if (buf_iter)
3501 		ring_buffer_iter_advance(buf_iter);
3502 }
3503 
3504 static struct trace_entry *
3505 peek_next_entry(struct trace_iterator *iter, int cpu, u64 *ts,
3506 		unsigned long *lost_events)
3507 {
3508 	struct ring_buffer_event *event;
3509 	struct ring_buffer_iter *buf_iter = trace_buffer_iter(iter, cpu);
3510 
3511 	if (buf_iter) {
3512 		event = ring_buffer_iter_peek(buf_iter, ts);
3513 		if (lost_events)
3514 			*lost_events = ring_buffer_iter_dropped(buf_iter) ?
3515 				(unsigned long)-1 : 0;
3516 	} else {
3517 		event = ring_buffer_peek(iter->array_buffer->buffer, cpu, ts,
3518 					 lost_events);
3519 	}
3520 
3521 	if (event) {
3522 		iter->ent_size = ring_buffer_event_length(event);
3523 		return ring_buffer_event_data(event);
3524 	}
3525 	iter->ent_size = 0;
3526 	return NULL;
3527 }
3528 
3529 static struct trace_entry *
3530 __find_next_entry(struct trace_iterator *iter, int *ent_cpu,
3531 		  unsigned long *missing_events, u64 *ent_ts)
3532 {
3533 	struct trace_buffer *buffer = iter->array_buffer->buffer;
3534 	struct trace_entry *ent, *next = NULL;
3535 	unsigned long lost_events = 0, next_lost = 0;
3536 	int cpu_file = iter->cpu_file;
3537 	u64 next_ts = 0, ts;
3538 	int next_cpu = -1;
3539 	int next_size = 0;
3540 	int cpu;
3541 
3542 	/*
3543 	 * If we are in a per_cpu trace file, don't bother by iterating over
3544 	 * all cpu and peek directly.
3545 	 */
3546 	if (cpu_file > RING_BUFFER_ALL_CPUS) {
3547 		if (ring_buffer_empty_cpu(buffer, cpu_file))
3548 			return NULL;
3549 		ent = peek_next_entry(iter, cpu_file, ent_ts, missing_events);
3550 		if (ent_cpu)
3551 			*ent_cpu = cpu_file;
3552 
3553 		return ent;
3554 	}
3555 
3556 	for_each_tracing_cpu(cpu) {
3557 
3558 		if (ring_buffer_empty_cpu(buffer, cpu))
3559 			continue;
3560 
3561 		ent = peek_next_entry(iter, cpu, &ts, &lost_events);
3562 
3563 		/*
3564 		 * Pick the entry with the smallest timestamp:
3565 		 */
3566 		if (ent && (!next || ts < next_ts)) {
3567 			next = ent;
3568 			next_cpu = cpu;
3569 			next_ts = ts;
3570 			next_lost = lost_events;
3571 			next_size = iter->ent_size;
3572 		}
3573 	}
3574 
3575 	iter->ent_size = next_size;
3576 
3577 	if (ent_cpu)
3578 		*ent_cpu = next_cpu;
3579 
3580 	if (ent_ts)
3581 		*ent_ts = next_ts;
3582 
3583 	if (missing_events)
3584 		*missing_events = next_lost;
3585 
3586 	return next;
3587 }
3588 
3589 #define STATIC_FMT_BUF_SIZE	128
3590 static char static_fmt_buf[STATIC_FMT_BUF_SIZE];
3591 
3592 char *trace_iter_expand_format(struct trace_iterator *iter)
3593 {
3594 	char *tmp;
3595 
3596 	/*
3597 	 * iter->tr is NULL when used with tp_printk, which makes
3598 	 * this get called where it is not safe to call krealloc().
3599 	 */
3600 	if (!iter->tr || iter->fmt == static_fmt_buf)
3601 		return NULL;
3602 
3603 	tmp = krealloc(iter->fmt, iter->fmt_size + STATIC_FMT_BUF_SIZE,
3604 		       GFP_KERNEL);
3605 	if (tmp) {
3606 		iter->fmt_size += STATIC_FMT_BUF_SIZE;
3607 		iter->fmt = tmp;
3608 	}
3609 
3610 	return tmp;
3611 }
3612 
3613 /* Returns true if the string is safe to dereference from an event */
3614 static bool trace_safe_str(struct trace_iterator *iter, const char *str,
3615 			   bool star, int len)
3616 {
3617 	unsigned long addr = (unsigned long)str;
3618 	struct trace_event *trace_event;
3619 	struct trace_event_call *event;
3620 
3621 	/* Ignore strings with no length */
3622 	if (star && !len)
3623 		return true;
3624 
3625 	/* OK if part of the event data */
3626 	if ((addr >= (unsigned long)iter->ent) &&
3627 	    (addr < (unsigned long)iter->ent + iter->ent_size))
3628 		return true;
3629 
3630 	/* OK if part of the temp seq buffer */
3631 	if ((addr >= (unsigned long)iter->tmp_seq.buffer) &&
3632 	    (addr < (unsigned long)iter->tmp_seq.buffer + TRACE_SEQ_BUFFER_SIZE))
3633 		return true;
3634 
3635 	/* Core rodata can not be freed */
3636 	if (is_kernel_rodata(addr))
3637 		return true;
3638 
3639 	if (trace_is_tracepoint_string(str))
3640 		return true;
3641 
3642 	/*
3643 	 * Now this could be a module event, referencing core module
3644 	 * data, which is OK.
3645 	 */
3646 	if (!iter->ent)
3647 		return false;
3648 
3649 	trace_event = ftrace_find_event(iter->ent->type);
3650 	if (!trace_event)
3651 		return false;
3652 
3653 	event = container_of(trace_event, struct trace_event_call, event);
3654 	if ((event->flags & TRACE_EVENT_FL_DYNAMIC) || !event->module)
3655 		return false;
3656 
3657 	/* Would rather have rodata, but this will suffice */
3658 	if (within_module_core(addr, event->module))
3659 		return true;
3660 
3661 	return false;
3662 }
3663 
3664 static DEFINE_STATIC_KEY_FALSE(trace_no_verify);
3665 
3666 static int test_can_verify_check(const char *fmt, ...)
3667 {
3668 	char buf[16];
3669 	va_list ap;
3670 	int ret;
3671 
3672 	/*
3673 	 * The verifier is dependent on vsnprintf() modifies the va_list
3674 	 * passed to it, where it is sent as a reference. Some architectures
3675 	 * (like x86_32) passes it by value, which means that vsnprintf()
3676 	 * does not modify the va_list passed to it, and the verifier
3677 	 * would then need to be able to understand all the values that
3678 	 * vsnprintf can use. If it is passed by value, then the verifier
3679 	 * is disabled.
3680 	 */
3681 	va_start(ap, fmt);
3682 	vsnprintf(buf, 16, "%d", ap);
3683 	ret = va_arg(ap, int);
3684 	va_end(ap);
3685 
3686 	return ret;
3687 }
3688 
3689 static void test_can_verify(void)
3690 {
3691 	if (!test_can_verify_check("%d %d", 0, 1)) {
3692 		pr_info("trace event string verifier disabled\n");
3693 		static_branch_inc(&trace_no_verify);
3694 	}
3695 }
3696 
3697 /**
3698  * trace_check_vprintf - Check dereferenced strings while writing to the seq buffer
3699  * @iter: The iterator that holds the seq buffer and the event being printed
3700  * @fmt: The format used to print the event
3701  * @ap: The va_list holding the data to print from @fmt.
3702  *
3703  * This writes the data into the @iter->seq buffer using the data from
3704  * @fmt and @ap. If the format has a %s, then the source of the string
3705  * is examined to make sure it is safe to print, otherwise it will
3706  * warn and print "[UNSAFE MEMORY]" in place of the dereferenced string
3707  * pointer.
3708  */
3709 void trace_check_vprintf(struct trace_iterator *iter, const char *fmt,
3710 			 va_list ap)
3711 {
3712 	long text_delta = 0;
3713 	long data_delta = 0;
3714 	const char *p = fmt;
3715 	const char *str;
3716 	bool good;
3717 	int i, j;
3718 
3719 	if (WARN_ON_ONCE(!fmt))
3720 		return;
3721 
3722 	if (static_branch_unlikely(&trace_no_verify))
3723 		goto print;
3724 
3725 	/*
3726 	 * When the kernel is booted with the tp_printk command line
3727 	 * parameter, trace events go directly through to printk().
3728 	 * It also is checked by this function, but it does not
3729 	 * have an associated trace_array (tr) for it.
3730 	 */
3731 	if (iter->tr) {
3732 		text_delta = iter->tr->text_delta;
3733 		data_delta = iter->tr->data_delta;
3734 	}
3735 
3736 	/* Don't bother checking when doing a ftrace_dump() */
3737 	if (iter->fmt == static_fmt_buf)
3738 		goto print;
3739 
3740 	while (*p) {
3741 		bool star = false;
3742 		int len = 0;
3743 
3744 		j = 0;
3745 
3746 		/*
3747 		 * We only care about %s and variants
3748 		 * as well as %p[sS] if delta is non-zero
3749 		 */
3750 		for (i = 0; p[i]; i++) {
3751 			if (i + 1 >= iter->fmt_size) {
3752 				/*
3753 				 * If we can't expand the copy buffer,
3754 				 * just print it.
3755 				 */
3756 				if (!trace_iter_expand_format(iter))
3757 					goto print;
3758 			}
3759 
3760 			if (p[i] == '\\' && p[i+1]) {
3761 				i++;
3762 				continue;
3763 			}
3764 			if (p[i] == '%') {
3765 				/* Need to test cases like %08.*s */
3766 				for (j = 1; p[i+j]; j++) {
3767 					if (isdigit(p[i+j]) ||
3768 					    p[i+j] == '.')
3769 						continue;
3770 					if (p[i+j] == '*') {
3771 						star = true;
3772 						continue;
3773 					}
3774 					break;
3775 				}
3776 				if (p[i+j] == 's')
3777 					break;
3778 
3779 				if (text_delta && p[i+1] == 'p' &&
3780 				    ((p[i+2] == 's' || p[i+2] == 'S')))
3781 					break;
3782 
3783 				star = false;
3784 			}
3785 			j = 0;
3786 		}
3787 		/* If no %s found then just print normally */
3788 		if (!p[i])
3789 			break;
3790 
3791 		/* Copy up to the %s, and print that */
3792 		strncpy(iter->fmt, p, i);
3793 		iter->fmt[i] = '\0';
3794 		trace_seq_vprintf(&iter->seq, iter->fmt, ap);
3795 
3796 		/* Add delta to %pS pointers */
3797 		if (p[i+1] == 'p') {
3798 			unsigned long addr;
3799 			char fmt[4];
3800 
3801 			fmt[0] = '%';
3802 			fmt[1] = 'p';
3803 			fmt[2] = p[i+2]; /* Either %ps or %pS */
3804 			fmt[3] = '\0';
3805 
3806 			addr = va_arg(ap, unsigned long);
3807 			addr += text_delta;
3808 			trace_seq_printf(&iter->seq, fmt, (void *)addr);
3809 
3810 			p += i + 3;
3811 			continue;
3812 		}
3813 
3814 		/*
3815 		 * If iter->seq is full, the above call no longer guarantees
3816 		 * that ap is in sync with fmt processing, and further calls
3817 		 * to va_arg() can return wrong positional arguments.
3818 		 *
3819 		 * Ensure that ap is no longer used in this case.
3820 		 */
3821 		if (iter->seq.full) {
3822 			p = "";
3823 			break;
3824 		}
3825 
3826 		if (star)
3827 			len = va_arg(ap, int);
3828 
3829 		/* The ap now points to the string data of the %s */
3830 		str = va_arg(ap, const char *);
3831 
3832 		good = trace_safe_str(iter, str, star, len);
3833 
3834 		/* Could be from the last boot */
3835 		if (data_delta && !good) {
3836 			str += data_delta;
3837 			good = trace_safe_str(iter, str, star, len);
3838 		}
3839 
3840 		/*
3841 		 * If you hit this warning, it is likely that the
3842 		 * trace event in question used %s on a string that
3843 		 * was saved at the time of the event, but may not be
3844 		 * around when the trace is read. Use __string(),
3845 		 * __assign_str() and __get_str() helpers in the TRACE_EVENT()
3846 		 * instead. See samples/trace_events/trace-events-sample.h
3847 		 * for reference.
3848 		 */
3849 		if (WARN_ONCE(!good, "fmt: '%s' current_buffer: '%s'",
3850 			      fmt, seq_buf_str(&iter->seq.seq))) {
3851 			int ret;
3852 
3853 			/* Try to safely read the string */
3854 			if (star) {
3855 				if (len + 1 > iter->fmt_size)
3856 					len = iter->fmt_size - 1;
3857 				if (len < 0)
3858 					len = 0;
3859 				ret = copy_from_kernel_nofault(iter->fmt, str, len);
3860 				iter->fmt[len] = 0;
3861 				star = false;
3862 			} else {
3863 				ret = strncpy_from_kernel_nofault(iter->fmt, str,
3864 								  iter->fmt_size);
3865 			}
3866 			if (ret < 0)
3867 				trace_seq_printf(&iter->seq, "(0x%px)", str);
3868 			else
3869 				trace_seq_printf(&iter->seq, "(0x%px:%s)",
3870 						 str, iter->fmt);
3871 			str = "[UNSAFE-MEMORY]";
3872 			strcpy(iter->fmt, "%s");
3873 		} else {
3874 			strncpy(iter->fmt, p + i, j + 1);
3875 			iter->fmt[j+1] = '\0';
3876 		}
3877 		if (star)
3878 			trace_seq_printf(&iter->seq, iter->fmt, len, str);
3879 		else
3880 			trace_seq_printf(&iter->seq, iter->fmt, str);
3881 
3882 		p += i + j + 1;
3883 	}
3884  print:
3885 	if (*p)
3886 		trace_seq_vprintf(&iter->seq, p, ap);
3887 }
3888 
3889 const char *trace_event_format(struct trace_iterator *iter, const char *fmt)
3890 {
3891 	const char *p, *new_fmt;
3892 	char *q;
3893 
3894 	if (WARN_ON_ONCE(!fmt))
3895 		return fmt;
3896 
3897 	if (!iter->tr || iter->tr->trace_flags & TRACE_ITER_HASH_PTR)
3898 		return fmt;
3899 
3900 	p = fmt;
3901 	new_fmt = q = iter->fmt;
3902 	while (*p) {
3903 		if (unlikely(q - new_fmt + 3 > iter->fmt_size)) {
3904 			if (!trace_iter_expand_format(iter))
3905 				return fmt;
3906 
3907 			q += iter->fmt - new_fmt;
3908 			new_fmt = iter->fmt;
3909 		}
3910 
3911 		*q++ = *p++;
3912 
3913 		/* Replace %p with %px */
3914 		if (p[-1] == '%') {
3915 			if (p[0] == '%') {
3916 				*q++ = *p++;
3917 			} else if (p[0] == 'p' && !isalnum(p[1])) {
3918 				*q++ = *p++;
3919 				*q++ = 'x';
3920 			}
3921 		}
3922 	}
3923 	*q = '\0';
3924 
3925 	return new_fmt;
3926 }
3927 
3928 #define STATIC_TEMP_BUF_SIZE	128
3929 static char static_temp_buf[STATIC_TEMP_BUF_SIZE] __aligned(4);
3930 
3931 /* Find the next real entry, without updating the iterator itself */
3932 struct trace_entry *trace_find_next_entry(struct trace_iterator *iter,
3933 					  int *ent_cpu, u64 *ent_ts)
3934 {
3935 	/* __find_next_entry will reset ent_size */
3936 	int ent_size = iter->ent_size;
3937 	struct trace_entry *entry;
3938 
3939 	/*
3940 	 * If called from ftrace_dump(), then the iter->temp buffer
3941 	 * will be the static_temp_buf and not created from kmalloc.
3942 	 * If the entry size is greater than the buffer, we can
3943 	 * not save it. Just return NULL in that case. This is only
3944 	 * used to add markers when two consecutive events' time
3945 	 * stamps have a large delta. See trace_print_lat_context()
3946 	 */
3947 	if (iter->temp == static_temp_buf &&
3948 	    STATIC_TEMP_BUF_SIZE < ent_size)
3949 		return NULL;
3950 
3951 	/*
3952 	 * The __find_next_entry() may call peek_next_entry(), which may
3953 	 * call ring_buffer_peek() that may make the contents of iter->ent
3954 	 * undefined. Need to copy iter->ent now.
3955 	 */
3956 	if (iter->ent && iter->ent != iter->temp) {
3957 		if ((!iter->temp || iter->temp_size < iter->ent_size) &&
3958 		    !WARN_ON_ONCE(iter->temp == static_temp_buf)) {
3959 			void *temp;
3960 			temp = kmalloc(iter->ent_size, GFP_KERNEL);
3961 			if (!temp)
3962 				return NULL;
3963 			kfree(iter->temp);
3964 			iter->temp = temp;
3965 			iter->temp_size = iter->ent_size;
3966 		}
3967 		memcpy(iter->temp, iter->ent, iter->ent_size);
3968 		iter->ent = iter->temp;
3969 	}
3970 	entry = __find_next_entry(iter, ent_cpu, NULL, ent_ts);
3971 	/* Put back the original ent_size */
3972 	iter->ent_size = ent_size;
3973 
3974 	return entry;
3975 }
3976 
3977 /* Find the next real entry, and increment the iterator to the next entry */
3978 void *trace_find_next_entry_inc(struct trace_iterator *iter)
3979 {
3980 	iter->ent = __find_next_entry(iter, &iter->cpu,
3981 				      &iter->lost_events, &iter->ts);
3982 
3983 	if (iter->ent)
3984 		trace_iterator_increment(iter);
3985 
3986 	return iter->ent ? iter : NULL;
3987 }
3988 
3989 static void trace_consume(struct trace_iterator *iter)
3990 {
3991 	ring_buffer_consume(iter->array_buffer->buffer, iter->cpu, &iter->ts,
3992 			    &iter->lost_events);
3993 }
3994 
3995 static void *s_next(struct seq_file *m, void *v, loff_t *pos)
3996 {
3997 	struct trace_iterator *iter = m->private;
3998 	int i = (int)*pos;
3999 	void *ent;
4000 
4001 	WARN_ON_ONCE(iter->leftover);
4002 
4003 	(*pos)++;
4004 
4005 	/* can't go backwards */
4006 	if (iter->idx > i)
4007 		return NULL;
4008 
4009 	if (iter->idx < 0)
4010 		ent = trace_find_next_entry_inc(iter);
4011 	else
4012 		ent = iter;
4013 
4014 	while (ent && iter->idx < i)
4015 		ent = trace_find_next_entry_inc(iter);
4016 
4017 	iter->pos = *pos;
4018 
4019 	return ent;
4020 }
4021 
4022 void tracing_iter_reset(struct trace_iterator *iter, int cpu)
4023 {
4024 	struct ring_buffer_iter *buf_iter;
4025 	unsigned long entries = 0;
4026 	u64 ts;
4027 
4028 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = 0;
4029 
4030 	buf_iter = trace_buffer_iter(iter, cpu);
4031 	if (!buf_iter)
4032 		return;
4033 
4034 	ring_buffer_iter_reset(buf_iter);
4035 
4036 	/*
4037 	 * We could have the case with the max latency tracers
4038 	 * that a reset never took place on a cpu. This is evident
4039 	 * by the timestamp being before the start of the buffer.
4040 	 */
4041 	while (ring_buffer_iter_peek(buf_iter, &ts)) {
4042 		if (ts >= iter->array_buffer->time_start)
4043 			break;
4044 		entries++;
4045 		ring_buffer_iter_advance(buf_iter);
4046 		/* This could be a big loop */
4047 		cond_resched();
4048 	}
4049 
4050 	per_cpu_ptr(iter->array_buffer->data, cpu)->skipped_entries = entries;
4051 }
4052 
4053 /*
4054  * The current tracer is copied to avoid a global locking
4055  * all around.
4056  */
4057 static void *s_start(struct seq_file *m, loff_t *pos)
4058 {
4059 	struct trace_iterator *iter = m->private;
4060 	struct trace_array *tr = iter->tr;
4061 	int cpu_file = iter->cpu_file;
4062 	void *p = NULL;
4063 	loff_t l = 0;
4064 	int cpu;
4065 
4066 	mutex_lock(&trace_types_lock);
4067 	if (unlikely(tr->current_trace != iter->trace)) {
4068 		/* Close iter->trace before switching to the new current tracer */
4069 		if (iter->trace->close)
4070 			iter->trace->close(iter);
4071 		iter->trace = tr->current_trace;
4072 		/* Reopen the new current tracer */
4073 		if (iter->trace->open)
4074 			iter->trace->open(iter);
4075 	}
4076 	mutex_unlock(&trace_types_lock);
4077 
4078 #ifdef CONFIG_TRACER_MAX_TRACE
4079 	if (iter->snapshot && iter->trace->use_max_tr)
4080 		return ERR_PTR(-EBUSY);
4081 #endif
4082 
4083 	if (*pos != iter->pos) {
4084 		iter->ent = NULL;
4085 		iter->cpu = 0;
4086 		iter->idx = -1;
4087 
4088 		if (cpu_file == RING_BUFFER_ALL_CPUS) {
4089 			for_each_tracing_cpu(cpu)
4090 				tracing_iter_reset(iter, cpu);
4091 		} else
4092 			tracing_iter_reset(iter, cpu_file);
4093 
4094 		iter->leftover = 0;
4095 		for (p = iter; p && l < *pos; p = s_next(m, p, &l))
4096 			;
4097 
4098 	} else {
4099 		/*
4100 		 * If we overflowed the seq_file before, then we want
4101 		 * to just reuse the trace_seq buffer again.
4102 		 */
4103 		if (iter->leftover)
4104 			p = iter;
4105 		else {
4106 			l = *pos - 1;
4107 			p = s_next(m, p, &l);
4108 		}
4109 	}
4110 
4111 	trace_event_read_lock();
4112 	trace_access_lock(cpu_file);
4113 	return p;
4114 }
4115 
4116 static void s_stop(struct seq_file *m, void *p)
4117 {
4118 	struct trace_iterator *iter = m->private;
4119 
4120 #ifdef CONFIG_TRACER_MAX_TRACE
4121 	if (iter->snapshot && iter->trace->use_max_tr)
4122 		return;
4123 #endif
4124 
4125 	trace_access_unlock(iter->cpu_file);
4126 	trace_event_read_unlock();
4127 }
4128 
4129 static void
4130 get_total_entries_cpu(struct array_buffer *buf, unsigned long *total,
4131 		      unsigned long *entries, int cpu)
4132 {
4133 	unsigned long count;
4134 
4135 	count = ring_buffer_entries_cpu(buf->buffer, cpu);
4136 	/*
4137 	 * If this buffer has skipped entries, then we hold all
4138 	 * entries for the trace and we need to ignore the
4139 	 * ones before the time stamp.
4140 	 */
4141 	if (per_cpu_ptr(buf->data, cpu)->skipped_entries) {
4142 		count -= per_cpu_ptr(buf->data, cpu)->skipped_entries;
4143 		/* total is the same as the entries */
4144 		*total = count;
4145 	} else
4146 		*total = count +
4147 			ring_buffer_overrun_cpu(buf->buffer, cpu);
4148 	*entries = count;
4149 }
4150 
4151 static void
4152 get_total_entries(struct array_buffer *buf,
4153 		  unsigned long *total, unsigned long *entries)
4154 {
4155 	unsigned long t, e;
4156 	int cpu;
4157 
4158 	*total = 0;
4159 	*entries = 0;
4160 
4161 	for_each_tracing_cpu(cpu) {
4162 		get_total_entries_cpu(buf, &t, &e, cpu);
4163 		*total += t;
4164 		*entries += e;
4165 	}
4166 }
4167 
4168 unsigned long trace_total_entries_cpu(struct trace_array *tr, int cpu)
4169 {
4170 	unsigned long total, entries;
4171 
4172 	if (!tr)
4173 		tr = &global_trace;
4174 
4175 	get_total_entries_cpu(&tr->array_buffer, &total, &entries, cpu);
4176 
4177 	return entries;
4178 }
4179 
4180 unsigned long trace_total_entries(struct trace_array *tr)
4181 {
4182 	unsigned long total, entries;
4183 
4184 	if (!tr)
4185 		tr = &global_trace;
4186 
4187 	get_total_entries(&tr->array_buffer, &total, &entries);
4188 
4189 	return entries;
4190 }
4191 
4192 static void print_lat_help_header(struct seq_file *m)
4193 {
4194 	seq_puts(m, "#                    _------=> CPU#            \n"
4195 		    "#                   / _-----=> irqs-off/BH-disabled\n"
4196 		    "#                  | / _----=> need-resched    \n"
4197 		    "#                  || / _---=> hardirq/softirq \n"
4198 		    "#                  ||| / _--=> preempt-depth   \n"
4199 		    "#                  |||| / _-=> migrate-disable \n"
4200 		    "#                  ||||| /     delay           \n"
4201 		    "#  cmd     pid     |||||| time  |   caller     \n"
4202 		    "#     \\   /        ||||||  \\    |    /       \n");
4203 }
4204 
4205 static void print_event_info(struct array_buffer *buf, struct seq_file *m)
4206 {
4207 	unsigned long total;
4208 	unsigned long entries;
4209 
4210 	get_total_entries(buf, &total, &entries);
4211 	seq_printf(m, "# entries-in-buffer/entries-written: %lu/%lu   #P:%d\n",
4212 		   entries, total, num_online_cpus());
4213 	seq_puts(m, "#\n");
4214 }
4215 
4216 static void print_func_help_header(struct array_buffer *buf, struct seq_file *m,
4217 				   unsigned int flags)
4218 {
4219 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4220 
4221 	print_event_info(buf, m);
4222 
4223 	seq_printf(m, "#           TASK-PID    %s CPU#     TIMESTAMP  FUNCTION\n", tgid ? "   TGID   " : "");
4224 	seq_printf(m, "#              | |      %s   |         |         |\n",      tgid ? "     |    " : "");
4225 }
4226 
4227 static void print_func_help_header_irq(struct array_buffer *buf, struct seq_file *m,
4228 				       unsigned int flags)
4229 {
4230 	bool tgid = flags & TRACE_ITER_RECORD_TGID;
4231 	static const char space[] = "            ";
4232 	int prec = tgid ? 12 : 2;
4233 
4234 	print_event_info(buf, m);
4235 
4236 	seq_printf(m, "#                            %.*s  _-----=> irqs-off/BH-disabled\n", prec, space);
4237 	seq_printf(m, "#                            %.*s / _----=> need-resched\n", prec, space);
4238 	seq_printf(m, "#                            %.*s| / _---=> hardirq/softirq\n", prec, space);
4239 	seq_printf(m, "#                            %.*s|| / _--=> preempt-depth\n", prec, space);
4240 	seq_printf(m, "#                            %.*s||| / _-=> migrate-disable\n", prec, space);
4241 	seq_printf(m, "#                            %.*s|||| /     delay\n", prec, space);
4242 	seq_printf(m, "#           TASK-PID  %.*s CPU#  |||||  TIMESTAMP  FUNCTION\n", prec, "     TGID   ");
4243 	seq_printf(m, "#              | |    %.*s   |   |||||     |         |\n", prec, "       |    ");
4244 }
4245 
4246 void
4247 print_trace_header(struct seq_file *m, struct trace_iterator *iter)
4248 {
4249 	unsigned long sym_flags = (global_trace.trace_flags & TRACE_ITER_SYM_MASK);
4250 	struct array_buffer *buf = iter->array_buffer;
4251 	struct trace_array_cpu *data = per_cpu_ptr(buf->data, buf->cpu);
4252 	struct tracer *type = iter->trace;
4253 	unsigned long entries;
4254 	unsigned long total;
4255 	const char *name = type->name;
4256 
4257 	get_total_entries(buf, &total, &entries);
4258 
4259 	seq_printf(m, "# %s latency trace v1.1.5 on %s\n",
4260 		   name, init_utsname()->release);
4261 	seq_puts(m, "# -----------------------------------"
4262 		 "---------------------------------\n");
4263 	seq_printf(m, "# latency: %lu us, #%lu/%lu, CPU#%d |"
4264 		   " (M:%s VP:%d, KP:%d, SP:%d HP:%d",
4265 		   nsecs_to_usecs(data->saved_latency),
4266 		   entries,
4267 		   total,
4268 		   buf->cpu,
4269 		   preempt_model_none()      ? "server" :
4270 		   preempt_model_voluntary() ? "desktop" :
4271 		   preempt_model_full()      ? "preempt" :
4272 		   preempt_model_rt()        ? "preempt_rt" :
4273 		   "unknown",
4274 		   /* These are reserved for later use */
4275 		   0, 0, 0, 0);
4276 #ifdef CONFIG_SMP
4277 	seq_printf(m, " #P:%d)\n", num_online_cpus());
4278 #else
4279 	seq_puts(m, ")\n");
4280 #endif
4281 	seq_puts(m, "#    -----------------\n");
4282 	seq_printf(m, "#    | task: %.16s-%d "
4283 		   "(uid:%d nice:%ld policy:%ld rt_prio:%ld)\n",
4284 		   data->comm, data->pid,
4285 		   from_kuid_munged(seq_user_ns(m), data->uid), data->nice,
4286 		   data->policy, data->rt_priority);
4287 	seq_puts(m, "#    -----------------\n");
4288 
4289 	if (data->critical_start) {
4290 		seq_puts(m, "#  => started at: ");
4291 		seq_print_ip_sym(&iter->seq, data->critical_start, sym_flags);
4292 		trace_print_seq(m, &iter->seq);
4293 		seq_puts(m, "\n#  => ended at:   ");
4294 		seq_print_ip_sym(&iter->seq, data->critical_end, sym_flags);
4295 		trace_print_seq(m, &iter->seq);
4296 		seq_puts(m, "\n#\n");
4297 	}
4298 
4299 	seq_puts(m, "#\n");
4300 }
4301 
4302 static void test_cpu_buff_start(struct trace_iterator *iter)
4303 {
4304 	struct trace_seq *s = &iter->seq;
4305 	struct trace_array *tr = iter->tr;
4306 
4307 	if (!(tr->trace_flags & TRACE_ITER_ANNOTATE))
4308 		return;
4309 
4310 	if (!(iter->iter_flags & TRACE_FILE_ANNOTATE))
4311 		return;
4312 
4313 	if (cpumask_available(iter->started) &&
4314 	    cpumask_test_cpu(iter->cpu, iter->started))
4315 		return;
4316 
4317 	if (per_cpu_ptr(iter->array_buffer->data, iter->cpu)->skipped_entries)
4318 		return;
4319 
4320 	if (cpumask_available(iter->started))
4321 		cpumask_set_cpu(iter->cpu, iter->started);
4322 
4323 	/* Don't print started cpu buffer for the first entry of the trace */
4324 	if (iter->idx > 1)
4325 		trace_seq_printf(s, "##### CPU %u buffer started ####\n",
4326 				iter->cpu);
4327 }
4328 
4329 static enum print_line_t print_trace_fmt(struct trace_iterator *iter)
4330 {
4331 	struct trace_array *tr = iter->tr;
4332 	struct trace_seq *s = &iter->seq;
4333 	unsigned long sym_flags = (tr->trace_flags & TRACE_ITER_SYM_MASK);
4334 	struct trace_entry *entry;
4335 	struct trace_event *event;
4336 
4337 	entry = iter->ent;
4338 
4339 	test_cpu_buff_start(iter);
4340 
4341 	event = ftrace_find_event(entry->type);
4342 
4343 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4344 		if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4345 			trace_print_lat_context(iter);
4346 		else
4347 			trace_print_context(iter);
4348 	}
4349 
4350 	if (trace_seq_has_overflowed(s))
4351 		return TRACE_TYPE_PARTIAL_LINE;
4352 
4353 	if (event) {
4354 		if (tr->trace_flags & TRACE_ITER_FIELDS)
4355 			return print_event_fields(iter, event);
4356 		return event->funcs->trace(iter, sym_flags, event);
4357 	}
4358 
4359 	trace_seq_printf(s, "Unknown type %d\n", entry->type);
4360 
4361 	return trace_handle_return(s);
4362 }
4363 
4364 static enum print_line_t print_raw_fmt(struct trace_iterator *iter)
4365 {
4366 	struct trace_array *tr = iter->tr;
4367 	struct trace_seq *s = &iter->seq;
4368 	struct trace_entry *entry;
4369 	struct trace_event *event;
4370 
4371 	entry = iter->ent;
4372 
4373 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO)
4374 		trace_seq_printf(s, "%d %d %llu ",
4375 				 entry->pid, iter->cpu, iter->ts);
4376 
4377 	if (trace_seq_has_overflowed(s))
4378 		return TRACE_TYPE_PARTIAL_LINE;
4379 
4380 	event = ftrace_find_event(entry->type);
4381 	if (event)
4382 		return event->funcs->raw(iter, 0, event);
4383 
4384 	trace_seq_printf(s, "%d ?\n", entry->type);
4385 
4386 	return trace_handle_return(s);
4387 }
4388 
4389 static enum print_line_t print_hex_fmt(struct trace_iterator *iter)
4390 {
4391 	struct trace_array *tr = iter->tr;
4392 	struct trace_seq *s = &iter->seq;
4393 	unsigned char newline = '\n';
4394 	struct trace_entry *entry;
4395 	struct trace_event *event;
4396 
4397 	entry = iter->ent;
4398 
4399 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4400 		SEQ_PUT_HEX_FIELD(s, entry->pid);
4401 		SEQ_PUT_HEX_FIELD(s, iter->cpu);
4402 		SEQ_PUT_HEX_FIELD(s, iter->ts);
4403 		if (trace_seq_has_overflowed(s))
4404 			return TRACE_TYPE_PARTIAL_LINE;
4405 	}
4406 
4407 	event = ftrace_find_event(entry->type);
4408 	if (event) {
4409 		enum print_line_t ret = event->funcs->hex(iter, 0, event);
4410 		if (ret != TRACE_TYPE_HANDLED)
4411 			return ret;
4412 	}
4413 
4414 	SEQ_PUT_FIELD(s, newline);
4415 
4416 	return trace_handle_return(s);
4417 }
4418 
4419 static enum print_line_t print_bin_fmt(struct trace_iterator *iter)
4420 {
4421 	struct trace_array *tr = iter->tr;
4422 	struct trace_seq *s = &iter->seq;
4423 	struct trace_entry *entry;
4424 	struct trace_event *event;
4425 
4426 	entry = iter->ent;
4427 
4428 	if (tr->trace_flags & TRACE_ITER_CONTEXT_INFO) {
4429 		SEQ_PUT_FIELD(s, entry->pid);
4430 		SEQ_PUT_FIELD(s, iter->cpu);
4431 		SEQ_PUT_FIELD(s, iter->ts);
4432 		if (trace_seq_has_overflowed(s))
4433 			return TRACE_TYPE_PARTIAL_LINE;
4434 	}
4435 
4436 	event = ftrace_find_event(entry->type);
4437 	return event ? event->funcs->binary(iter, 0, event) :
4438 		TRACE_TYPE_HANDLED;
4439 }
4440 
4441 int trace_empty(struct trace_iterator *iter)
4442 {
4443 	struct ring_buffer_iter *buf_iter;
4444 	int cpu;
4445 
4446 	/* If we are looking at one CPU buffer, only check that one */
4447 	if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
4448 		cpu = iter->cpu_file;
4449 		buf_iter = trace_buffer_iter(iter, cpu);
4450 		if (buf_iter) {
4451 			if (!ring_buffer_iter_empty(buf_iter))
4452 				return 0;
4453 		} else {
4454 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4455 				return 0;
4456 		}
4457 		return 1;
4458 	}
4459 
4460 	for_each_tracing_cpu(cpu) {
4461 		buf_iter = trace_buffer_iter(iter, cpu);
4462 		if (buf_iter) {
4463 			if (!ring_buffer_iter_empty(buf_iter))
4464 				return 0;
4465 		} else {
4466 			if (!ring_buffer_empty_cpu(iter->array_buffer->buffer, cpu))
4467 				return 0;
4468 		}
4469 	}
4470 
4471 	return 1;
4472 }
4473 
4474 /*  Called with trace_event_read_lock() held. */
4475 enum print_line_t print_trace_line(struct trace_iterator *iter)
4476 {
4477 	struct trace_array *tr = iter->tr;
4478 	unsigned long trace_flags = tr->trace_flags;
4479 	enum print_line_t ret;
4480 
4481 	if (iter->lost_events) {
4482 		if (iter->lost_events == (unsigned long)-1)
4483 			trace_seq_printf(&iter->seq, "CPU:%d [LOST EVENTS]\n",
4484 					 iter->cpu);
4485 		else
4486 			trace_seq_printf(&iter->seq, "CPU:%d [LOST %lu EVENTS]\n",
4487 					 iter->cpu, iter->lost_events);
4488 		if (trace_seq_has_overflowed(&iter->seq))
4489 			return TRACE_TYPE_PARTIAL_LINE;
4490 	}
4491 
4492 	if (iter->trace && iter->trace->print_line) {
4493 		ret = iter->trace->print_line(iter);
4494 		if (ret != TRACE_TYPE_UNHANDLED)
4495 			return ret;
4496 	}
4497 
4498 	if (iter->ent->type == TRACE_BPUTS &&
4499 			trace_flags & TRACE_ITER_PRINTK &&
4500 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4501 		return trace_print_bputs_msg_only(iter);
4502 
4503 	if (iter->ent->type == TRACE_BPRINT &&
4504 			trace_flags & TRACE_ITER_PRINTK &&
4505 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4506 		return trace_print_bprintk_msg_only(iter);
4507 
4508 	if (iter->ent->type == TRACE_PRINT &&
4509 			trace_flags & TRACE_ITER_PRINTK &&
4510 			trace_flags & TRACE_ITER_PRINTK_MSGONLY)
4511 		return trace_print_printk_msg_only(iter);
4512 
4513 	if (trace_flags & TRACE_ITER_BIN)
4514 		return print_bin_fmt(iter);
4515 
4516 	if (trace_flags & TRACE_ITER_HEX)
4517 		return print_hex_fmt(iter);
4518 
4519 	if (trace_flags & TRACE_ITER_RAW)
4520 		return print_raw_fmt(iter);
4521 
4522 	return print_trace_fmt(iter);
4523 }
4524 
4525 void trace_latency_header(struct seq_file *m)
4526 {
4527 	struct trace_iterator *iter = m->private;
4528 	struct trace_array *tr = iter->tr;
4529 
4530 	/* print nothing if the buffers are empty */
4531 	if (trace_empty(iter))
4532 		return;
4533 
4534 	if (iter->iter_flags & TRACE_FILE_LAT_FMT)
4535 		print_trace_header(m, iter);
4536 
4537 	if (!(tr->trace_flags & TRACE_ITER_VERBOSE))
4538 		print_lat_help_header(m);
4539 }
4540 
4541 void trace_default_header(struct seq_file *m)
4542 {
4543 	struct trace_iterator *iter = m->private;
4544 	struct trace_array *tr = iter->tr;
4545 	unsigned long trace_flags = tr->trace_flags;
4546 
4547 	if (!(trace_flags & TRACE_ITER_CONTEXT_INFO))
4548 		return;
4549 
4550 	if (iter->iter_flags & TRACE_FILE_LAT_FMT) {
4551 		/* print nothing if the buffers are empty */
4552 		if (trace_empty(iter))
4553 			return;
4554 		print_trace_header(m, iter);
4555 		if (!(trace_flags & TRACE_ITER_VERBOSE))
4556 			print_lat_help_header(m);
4557 	} else {
4558 		if (!(trace_flags & TRACE_ITER_VERBOSE)) {
4559 			if (trace_flags & TRACE_ITER_IRQ_INFO)
4560 				print_func_help_header_irq(iter->array_buffer,
4561 							   m, trace_flags);
4562 			else
4563 				print_func_help_header(iter->array_buffer, m,
4564 						       trace_flags);
4565 		}
4566 	}
4567 }
4568 
4569 static void test_ftrace_alive(struct seq_file *m)
4570 {
4571 	if (!ftrace_is_dead())
4572 		return;
4573 	seq_puts(m, "# WARNING: FUNCTION TRACING IS CORRUPTED\n"
4574 		    "#          MAY BE MISSING FUNCTION EVENTS\n");
4575 }
4576 
4577 #ifdef CONFIG_TRACER_MAX_TRACE
4578 static void show_snapshot_main_help(struct seq_file *m)
4579 {
4580 	seq_puts(m, "# echo 0 > snapshot : Clears and frees snapshot buffer\n"
4581 		    "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4582 		    "#                      Takes a snapshot of the main buffer.\n"
4583 		    "# echo 2 > snapshot : Clears snapshot buffer (but does not allocate or free)\n"
4584 		    "#                      (Doesn't have to be '2' works with any number that\n"
4585 		    "#                       is not a '0' or '1')\n");
4586 }
4587 
4588 static void show_snapshot_percpu_help(struct seq_file *m)
4589 {
4590 	seq_puts(m, "# echo 0 > snapshot : Invalid for per_cpu snapshot file.\n");
4591 #ifdef CONFIG_RING_BUFFER_ALLOW_SWAP
4592 	seq_puts(m, "# echo 1 > snapshot : Allocates snapshot buffer, if not already allocated.\n"
4593 		    "#                      Takes a snapshot of the main buffer for this cpu.\n");
4594 #else
4595 	seq_puts(m, "# echo 1 > snapshot : Not supported with this kernel.\n"
4596 		    "#                     Must use main snapshot file to allocate.\n");
4597 #endif
4598 	seq_puts(m, "# echo 2 > snapshot : Clears this cpu's snapshot buffer (but does not allocate)\n"
4599 		    "#                      (Doesn't have to be '2' works with any number that\n"
4600 		    "#                       is not a '0' or '1')\n");
4601 }
4602 
4603 static void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter)
4604 {
4605 	if (iter->tr->allocated_snapshot)
4606 		seq_puts(m, "#\n# * Snapshot is allocated *\n#\n");
4607 	else
4608 		seq_puts(m, "#\n# * Snapshot is freed *\n#\n");
4609 
4610 	seq_puts(m, "# Snapshot commands:\n");
4611 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
4612 		show_snapshot_main_help(m);
4613 	else
4614 		show_snapshot_percpu_help(m);
4615 }
4616 #else
4617 /* Should never be called */
4618 static inline void print_snapshot_help(struct seq_file *m, struct trace_iterator *iter) { }
4619 #endif
4620 
4621 static int s_show(struct seq_file *m, void *v)
4622 {
4623 	struct trace_iterator *iter = v;
4624 	int ret;
4625 
4626 	if (iter->ent == NULL) {
4627 		if (iter->tr) {
4628 			seq_printf(m, "# tracer: %s\n", iter->trace->name);
4629 			seq_puts(m, "#\n");
4630 			test_ftrace_alive(m);
4631 		}
4632 		if (iter->snapshot && trace_empty(iter))
4633 			print_snapshot_help(m, iter);
4634 		else if (iter->trace && iter->trace->print_header)
4635 			iter->trace->print_header(m);
4636 		else
4637 			trace_default_header(m);
4638 
4639 	} else if (iter->leftover) {
4640 		/*
4641 		 * If we filled the seq_file buffer earlier, we
4642 		 * want to just show it now.
4643 		 */
4644 		ret = trace_print_seq(m, &iter->seq);
4645 
4646 		/* ret should this time be zero, but you never know */
4647 		iter->leftover = ret;
4648 
4649 	} else {
4650 		ret = print_trace_line(iter);
4651 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
4652 			iter->seq.full = 0;
4653 			trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
4654 		}
4655 		ret = trace_print_seq(m, &iter->seq);
4656 		/*
4657 		 * If we overflow the seq_file buffer, then it will
4658 		 * ask us for this data again at start up.
4659 		 * Use that instead.
4660 		 *  ret is 0 if seq_file write succeeded.
4661 		 *        -1 otherwise.
4662 		 */
4663 		iter->leftover = ret;
4664 	}
4665 
4666 	return 0;
4667 }
4668 
4669 /*
4670  * Should be used after trace_array_get(), trace_types_lock
4671  * ensures that i_cdev was already initialized.
4672  */
4673 static inline int tracing_get_cpu(struct inode *inode)
4674 {
4675 	if (inode->i_cdev) /* See trace_create_cpu_file() */
4676 		return (long)inode->i_cdev - 1;
4677 	return RING_BUFFER_ALL_CPUS;
4678 }
4679 
4680 static const struct seq_operations tracer_seq_ops = {
4681 	.start		= s_start,
4682 	.next		= s_next,
4683 	.stop		= s_stop,
4684 	.show		= s_show,
4685 };
4686 
4687 /*
4688  * Note, as iter itself can be allocated and freed in different
4689  * ways, this function is only used to free its content, and not
4690  * the iterator itself. The only requirement to all the allocations
4691  * is that it must zero all fields (kzalloc), as freeing works with
4692  * ethier allocated content or NULL.
4693  */
4694 static void free_trace_iter_content(struct trace_iterator *iter)
4695 {
4696 	/* The fmt is either NULL, allocated or points to static_fmt_buf */
4697 	if (iter->fmt != static_fmt_buf)
4698 		kfree(iter->fmt);
4699 
4700 	kfree(iter->temp);
4701 	kfree(iter->buffer_iter);
4702 	mutex_destroy(&iter->mutex);
4703 	free_cpumask_var(iter->started);
4704 }
4705 
4706 static struct trace_iterator *
4707 __tracing_open(struct inode *inode, struct file *file, bool snapshot)
4708 {
4709 	struct trace_array *tr = inode->i_private;
4710 	struct trace_iterator *iter;
4711 	int cpu;
4712 
4713 	if (tracing_disabled)
4714 		return ERR_PTR(-ENODEV);
4715 
4716 	iter = __seq_open_private(file, &tracer_seq_ops, sizeof(*iter));
4717 	if (!iter)
4718 		return ERR_PTR(-ENOMEM);
4719 
4720 	iter->buffer_iter = kcalloc(nr_cpu_ids, sizeof(*iter->buffer_iter),
4721 				    GFP_KERNEL);
4722 	if (!iter->buffer_iter)
4723 		goto release;
4724 
4725 	/*
4726 	 * trace_find_next_entry() may need to save off iter->ent.
4727 	 * It will place it into the iter->temp buffer. As most
4728 	 * events are less than 128, allocate a buffer of that size.
4729 	 * If one is greater, then trace_find_next_entry() will
4730 	 * allocate a new buffer to adjust for the bigger iter->ent.
4731 	 * It's not critical if it fails to get allocated here.
4732 	 */
4733 	iter->temp = kmalloc(128, GFP_KERNEL);
4734 	if (iter->temp)
4735 		iter->temp_size = 128;
4736 
4737 	/*
4738 	 * trace_event_printf() may need to modify given format
4739 	 * string to replace %p with %px so that it shows real address
4740 	 * instead of hash value. However, that is only for the event
4741 	 * tracing, other tracer may not need. Defer the allocation
4742 	 * until it is needed.
4743 	 */
4744 	iter->fmt = NULL;
4745 	iter->fmt_size = 0;
4746 
4747 	mutex_lock(&trace_types_lock);
4748 	iter->trace = tr->current_trace;
4749 
4750 	if (!zalloc_cpumask_var(&iter->started, GFP_KERNEL))
4751 		goto fail;
4752 
4753 	iter->tr = tr;
4754 
4755 #ifdef CONFIG_TRACER_MAX_TRACE
4756 	/* Currently only the top directory has a snapshot */
4757 	if (tr->current_trace->print_max || snapshot)
4758 		iter->array_buffer = &tr->max_buffer;
4759 	else
4760 #endif
4761 		iter->array_buffer = &tr->array_buffer;
4762 	iter->snapshot = snapshot;
4763 	iter->pos = -1;
4764 	iter->cpu_file = tracing_get_cpu(inode);
4765 	mutex_init(&iter->mutex);
4766 
4767 	/* Notify the tracer early; before we stop tracing. */
4768 	if (iter->trace->open)
4769 		iter->trace->open(iter);
4770 
4771 	/* Annotate start of buffers if we had overruns */
4772 	if (ring_buffer_overruns(iter->array_buffer->buffer))
4773 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
4774 
4775 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
4776 	if (trace_clocks[tr->clock_id].in_ns)
4777 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
4778 
4779 	/*
4780 	 * If pause-on-trace is enabled, then stop the trace while
4781 	 * dumping, unless this is the "snapshot" file
4782 	 */
4783 	if (!iter->snapshot && (tr->trace_flags & TRACE_ITER_PAUSE_ON_TRACE))
4784 		tracing_stop_tr(tr);
4785 
4786 	if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
4787 		for_each_tracing_cpu(cpu) {
4788 			iter->buffer_iter[cpu] =
4789 				ring_buffer_read_prepare(iter->array_buffer->buffer,
4790 							 cpu, GFP_KERNEL);
4791 		}
4792 		ring_buffer_read_prepare_sync();
4793 		for_each_tracing_cpu(cpu) {
4794 			ring_buffer_read_start(iter->buffer_iter[cpu]);
4795 			tracing_iter_reset(iter, cpu);
4796 		}
4797 	} else {
4798 		cpu = iter->cpu_file;
4799 		iter->buffer_iter[cpu] =
4800 			ring_buffer_read_prepare(iter->array_buffer->buffer,
4801 						 cpu, GFP_KERNEL);
4802 		ring_buffer_read_prepare_sync();
4803 		ring_buffer_read_start(iter->buffer_iter[cpu]);
4804 		tracing_iter_reset(iter, cpu);
4805 	}
4806 
4807 	mutex_unlock(&trace_types_lock);
4808 
4809 	return iter;
4810 
4811  fail:
4812 	mutex_unlock(&trace_types_lock);
4813 	free_trace_iter_content(iter);
4814 release:
4815 	seq_release_private(inode, file);
4816 	return ERR_PTR(-ENOMEM);
4817 }
4818 
4819 int tracing_open_generic(struct inode *inode, struct file *filp)
4820 {
4821 	int ret;
4822 
4823 	ret = tracing_check_open_get_tr(NULL);
4824 	if (ret)
4825 		return ret;
4826 
4827 	filp->private_data = inode->i_private;
4828 	return 0;
4829 }
4830 
4831 bool tracing_is_disabled(void)
4832 {
4833 	return (tracing_disabled) ? true: false;
4834 }
4835 
4836 /*
4837  * Open and update trace_array ref count.
4838  * Must have the current trace_array passed to it.
4839  */
4840 int tracing_open_generic_tr(struct inode *inode, struct file *filp)
4841 {
4842 	struct trace_array *tr = inode->i_private;
4843 	int ret;
4844 
4845 	ret = tracing_check_open_get_tr(tr);
4846 	if (ret)
4847 		return ret;
4848 
4849 	filp->private_data = inode->i_private;
4850 
4851 	return 0;
4852 }
4853 
4854 /*
4855  * The private pointer of the inode is the trace_event_file.
4856  * Update the tr ref count associated to it.
4857  */
4858 int tracing_open_file_tr(struct inode *inode, struct file *filp)
4859 {
4860 	struct trace_event_file *file = inode->i_private;
4861 	int ret;
4862 
4863 	ret = tracing_check_open_get_tr(file->tr);
4864 	if (ret)
4865 		return ret;
4866 
4867 	mutex_lock(&event_mutex);
4868 
4869 	/* Fail if the file is marked for removal */
4870 	if (file->flags & EVENT_FILE_FL_FREED) {
4871 		trace_array_put(file->tr);
4872 		ret = -ENODEV;
4873 	} else {
4874 		event_file_get(file);
4875 	}
4876 
4877 	mutex_unlock(&event_mutex);
4878 	if (ret)
4879 		return ret;
4880 
4881 	filp->private_data = inode->i_private;
4882 
4883 	return 0;
4884 }
4885 
4886 int tracing_release_file_tr(struct inode *inode, struct file *filp)
4887 {
4888 	struct trace_event_file *file = inode->i_private;
4889 
4890 	trace_array_put(file->tr);
4891 	event_file_put(file);
4892 
4893 	return 0;
4894 }
4895 
4896 int tracing_single_release_file_tr(struct inode *inode, struct file *filp)
4897 {
4898 	tracing_release_file_tr(inode, filp);
4899 	return single_release(inode, filp);
4900 }
4901 
4902 static int tracing_mark_open(struct inode *inode, struct file *filp)
4903 {
4904 	stream_open(inode, filp);
4905 	return tracing_open_generic_tr(inode, filp);
4906 }
4907 
4908 static int tracing_release(struct inode *inode, struct file *file)
4909 {
4910 	struct trace_array *tr = inode->i_private;
4911 	struct seq_file *m = file->private_data;
4912 	struct trace_iterator *iter;
4913 	int cpu;
4914 
4915 	if (!(file->f_mode & FMODE_READ)) {
4916 		trace_array_put(tr);
4917 		return 0;
4918 	}
4919 
4920 	/* Writes do not use seq_file */
4921 	iter = m->private;
4922 	mutex_lock(&trace_types_lock);
4923 
4924 	for_each_tracing_cpu(cpu) {
4925 		if (iter->buffer_iter[cpu])
4926 			ring_buffer_read_finish(iter->buffer_iter[cpu]);
4927 	}
4928 
4929 	if (iter->trace && iter->trace->close)
4930 		iter->trace->close(iter);
4931 
4932 	if (!iter->snapshot && tr->stop_count)
4933 		/* reenable tracing if it was previously enabled */
4934 		tracing_start_tr(tr);
4935 
4936 	__trace_array_put(tr);
4937 
4938 	mutex_unlock(&trace_types_lock);
4939 
4940 	free_trace_iter_content(iter);
4941 	seq_release_private(inode, file);
4942 
4943 	return 0;
4944 }
4945 
4946 int tracing_release_generic_tr(struct inode *inode, struct file *file)
4947 {
4948 	struct trace_array *tr = inode->i_private;
4949 
4950 	trace_array_put(tr);
4951 	return 0;
4952 }
4953 
4954 static int tracing_single_release_tr(struct inode *inode, struct file *file)
4955 {
4956 	struct trace_array *tr = inode->i_private;
4957 
4958 	trace_array_put(tr);
4959 
4960 	return single_release(inode, file);
4961 }
4962 
4963 static int tracing_open(struct inode *inode, struct file *file)
4964 {
4965 	struct trace_array *tr = inode->i_private;
4966 	struct trace_iterator *iter;
4967 	int ret;
4968 
4969 	ret = tracing_check_open_get_tr(tr);
4970 	if (ret)
4971 		return ret;
4972 
4973 	/* If this file was open for write, then erase contents */
4974 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC)) {
4975 		int cpu = tracing_get_cpu(inode);
4976 		struct array_buffer *trace_buf = &tr->array_buffer;
4977 
4978 #ifdef CONFIG_TRACER_MAX_TRACE
4979 		if (tr->current_trace->print_max)
4980 			trace_buf = &tr->max_buffer;
4981 #endif
4982 
4983 		if (cpu == RING_BUFFER_ALL_CPUS)
4984 			tracing_reset_online_cpus(trace_buf);
4985 		else
4986 			tracing_reset_cpu(trace_buf, cpu);
4987 	}
4988 
4989 	if (file->f_mode & FMODE_READ) {
4990 		iter = __tracing_open(inode, file, false);
4991 		if (IS_ERR(iter))
4992 			ret = PTR_ERR(iter);
4993 		else if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
4994 			iter->iter_flags |= TRACE_FILE_LAT_FMT;
4995 	}
4996 
4997 	if (ret < 0)
4998 		trace_array_put(tr);
4999 
5000 	return ret;
5001 }
5002 
5003 /*
5004  * Some tracers are not suitable for instance buffers.
5005  * A tracer is always available for the global array (toplevel)
5006  * or if it explicitly states that it is.
5007  */
5008 static bool
5009 trace_ok_for_array(struct tracer *t, struct trace_array *tr)
5010 {
5011 #ifdef CONFIG_TRACER_SNAPSHOT
5012 	/* arrays with mapped buffer range do not have snapshots */
5013 	if (tr->range_addr_start && t->use_max_tr)
5014 		return false;
5015 #endif
5016 	return (tr->flags & TRACE_ARRAY_FL_GLOBAL) || t->allow_instances;
5017 }
5018 
5019 /* Find the next tracer that this trace array may use */
5020 static struct tracer *
5021 get_tracer_for_array(struct trace_array *tr, struct tracer *t)
5022 {
5023 	while (t && !trace_ok_for_array(t, tr))
5024 		t = t->next;
5025 
5026 	return t;
5027 }
5028 
5029 static void *
5030 t_next(struct seq_file *m, void *v, loff_t *pos)
5031 {
5032 	struct trace_array *tr = m->private;
5033 	struct tracer *t = v;
5034 
5035 	(*pos)++;
5036 
5037 	if (t)
5038 		t = get_tracer_for_array(tr, t->next);
5039 
5040 	return t;
5041 }
5042 
5043 static void *t_start(struct seq_file *m, loff_t *pos)
5044 {
5045 	struct trace_array *tr = m->private;
5046 	struct tracer *t;
5047 	loff_t l = 0;
5048 
5049 	mutex_lock(&trace_types_lock);
5050 
5051 	t = get_tracer_for_array(tr, trace_types);
5052 	for (; t && l < *pos; t = t_next(m, t, &l))
5053 			;
5054 
5055 	return t;
5056 }
5057 
5058 static void t_stop(struct seq_file *m, void *p)
5059 {
5060 	mutex_unlock(&trace_types_lock);
5061 }
5062 
5063 static int t_show(struct seq_file *m, void *v)
5064 {
5065 	struct tracer *t = v;
5066 
5067 	if (!t)
5068 		return 0;
5069 
5070 	seq_puts(m, t->name);
5071 	if (t->next)
5072 		seq_putc(m, ' ');
5073 	else
5074 		seq_putc(m, '\n');
5075 
5076 	return 0;
5077 }
5078 
5079 static const struct seq_operations show_traces_seq_ops = {
5080 	.start		= t_start,
5081 	.next		= t_next,
5082 	.stop		= t_stop,
5083 	.show		= t_show,
5084 };
5085 
5086 static int show_traces_open(struct inode *inode, struct file *file)
5087 {
5088 	struct trace_array *tr = inode->i_private;
5089 	struct seq_file *m;
5090 	int ret;
5091 
5092 	ret = tracing_check_open_get_tr(tr);
5093 	if (ret)
5094 		return ret;
5095 
5096 	ret = seq_open(file, &show_traces_seq_ops);
5097 	if (ret) {
5098 		trace_array_put(tr);
5099 		return ret;
5100 	}
5101 
5102 	m = file->private_data;
5103 	m->private = tr;
5104 
5105 	return 0;
5106 }
5107 
5108 static int tracing_seq_release(struct inode *inode, struct file *file)
5109 {
5110 	struct trace_array *tr = inode->i_private;
5111 
5112 	trace_array_put(tr);
5113 	return seq_release(inode, file);
5114 }
5115 
5116 static ssize_t
5117 tracing_write_stub(struct file *filp, const char __user *ubuf,
5118 		   size_t count, loff_t *ppos)
5119 {
5120 	return count;
5121 }
5122 
5123 loff_t tracing_lseek(struct file *file, loff_t offset, int whence)
5124 {
5125 	int ret;
5126 
5127 	if (file->f_mode & FMODE_READ)
5128 		ret = seq_lseek(file, offset, whence);
5129 	else
5130 		file->f_pos = ret = 0;
5131 
5132 	return ret;
5133 }
5134 
5135 static const struct file_operations tracing_fops = {
5136 	.open		= tracing_open,
5137 	.read		= seq_read,
5138 	.read_iter	= seq_read_iter,
5139 	.splice_read	= copy_splice_read,
5140 	.write		= tracing_write_stub,
5141 	.llseek		= tracing_lseek,
5142 	.release	= tracing_release,
5143 };
5144 
5145 static const struct file_operations show_traces_fops = {
5146 	.open		= show_traces_open,
5147 	.read		= seq_read,
5148 	.llseek		= seq_lseek,
5149 	.release	= tracing_seq_release,
5150 };
5151 
5152 static ssize_t
5153 tracing_cpumask_read(struct file *filp, char __user *ubuf,
5154 		     size_t count, loff_t *ppos)
5155 {
5156 	struct trace_array *tr = file_inode(filp)->i_private;
5157 	char *mask_str;
5158 	int len;
5159 
5160 	len = snprintf(NULL, 0, "%*pb\n",
5161 		       cpumask_pr_args(tr->tracing_cpumask)) + 1;
5162 	mask_str = kmalloc(len, GFP_KERNEL);
5163 	if (!mask_str)
5164 		return -ENOMEM;
5165 
5166 	len = snprintf(mask_str, len, "%*pb\n",
5167 		       cpumask_pr_args(tr->tracing_cpumask));
5168 	if (len >= count) {
5169 		count = -EINVAL;
5170 		goto out_err;
5171 	}
5172 	count = simple_read_from_buffer(ubuf, count, ppos, mask_str, len);
5173 
5174 out_err:
5175 	kfree(mask_str);
5176 
5177 	return count;
5178 }
5179 
5180 int tracing_set_cpumask(struct trace_array *tr,
5181 			cpumask_var_t tracing_cpumask_new)
5182 {
5183 	int cpu;
5184 
5185 	if (!tr)
5186 		return -EINVAL;
5187 
5188 	local_irq_disable();
5189 	arch_spin_lock(&tr->max_lock);
5190 	for_each_tracing_cpu(cpu) {
5191 		/*
5192 		 * Increase/decrease the disabled counter if we are
5193 		 * about to flip a bit in the cpumask:
5194 		 */
5195 		if (cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5196 				!cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5197 			atomic_inc(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5198 			ring_buffer_record_disable_cpu(tr->array_buffer.buffer, cpu);
5199 #ifdef CONFIG_TRACER_MAX_TRACE
5200 			ring_buffer_record_disable_cpu(tr->max_buffer.buffer, cpu);
5201 #endif
5202 		}
5203 		if (!cpumask_test_cpu(cpu, tr->tracing_cpumask) &&
5204 				cpumask_test_cpu(cpu, tracing_cpumask_new)) {
5205 			atomic_dec(&per_cpu_ptr(tr->array_buffer.data, cpu)->disabled);
5206 			ring_buffer_record_enable_cpu(tr->array_buffer.buffer, cpu);
5207 #ifdef CONFIG_TRACER_MAX_TRACE
5208 			ring_buffer_record_enable_cpu(tr->max_buffer.buffer, cpu);
5209 #endif
5210 		}
5211 	}
5212 	arch_spin_unlock(&tr->max_lock);
5213 	local_irq_enable();
5214 
5215 	cpumask_copy(tr->tracing_cpumask, tracing_cpumask_new);
5216 
5217 	return 0;
5218 }
5219 
5220 static ssize_t
5221 tracing_cpumask_write(struct file *filp, const char __user *ubuf,
5222 		      size_t count, loff_t *ppos)
5223 {
5224 	struct trace_array *tr = file_inode(filp)->i_private;
5225 	cpumask_var_t tracing_cpumask_new;
5226 	int err;
5227 
5228 	if (!zalloc_cpumask_var(&tracing_cpumask_new, GFP_KERNEL))
5229 		return -ENOMEM;
5230 
5231 	err = cpumask_parse_user(ubuf, count, tracing_cpumask_new);
5232 	if (err)
5233 		goto err_free;
5234 
5235 	err = tracing_set_cpumask(tr, tracing_cpumask_new);
5236 	if (err)
5237 		goto err_free;
5238 
5239 	free_cpumask_var(tracing_cpumask_new);
5240 
5241 	return count;
5242 
5243 err_free:
5244 	free_cpumask_var(tracing_cpumask_new);
5245 
5246 	return err;
5247 }
5248 
5249 static const struct file_operations tracing_cpumask_fops = {
5250 	.open		= tracing_open_generic_tr,
5251 	.read		= tracing_cpumask_read,
5252 	.write		= tracing_cpumask_write,
5253 	.release	= tracing_release_generic_tr,
5254 	.llseek		= generic_file_llseek,
5255 };
5256 
5257 static int tracing_trace_options_show(struct seq_file *m, void *v)
5258 {
5259 	struct tracer_opt *trace_opts;
5260 	struct trace_array *tr = m->private;
5261 	u32 tracer_flags;
5262 	int i;
5263 
5264 	mutex_lock(&trace_types_lock);
5265 	tracer_flags = tr->current_trace->flags->val;
5266 	trace_opts = tr->current_trace->flags->opts;
5267 
5268 	for (i = 0; trace_options[i]; i++) {
5269 		if (tr->trace_flags & (1 << i))
5270 			seq_printf(m, "%s\n", trace_options[i]);
5271 		else
5272 			seq_printf(m, "no%s\n", trace_options[i]);
5273 	}
5274 
5275 	for (i = 0; trace_opts[i].name; i++) {
5276 		if (tracer_flags & trace_opts[i].bit)
5277 			seq_printf(m, "%s\n", trace_opts[i].name);
5278 		else
5279 			seq_printf(m, "no%s\n", trace_opts[i].name);
5280 	}
5281 	mutex_unlock(&trace_types_lock);
5282 
5283 	return 0;
5284 }
5285 
5286 static int __set_tracer_option(struct trace_array *tr,
5287 			       struct tracer_flags *tracer_flags,
5288 			       struct tracer_opt *opts, int neg)
5289 {
5290 	struct tracer *trace = tracer_flags->trace;
5291 	int ret;
5292 
5293 	ret = trace->set_flag(tr, tracer_flags->val, opts->bit, !neg);
5294 	if (ret)
5295 		return ret;
5296 
5297 	if (neg)
5298 		tracer_flags->val &= ~opts->bit;
5299 	else
5300 		tracer_flags->val |= opts->bit;
5301 	return 0;
5302 }
5303 
5304 /* Try to assign a tracer specific option */
5305 static int set_tracer_option(struct trace_array *tr, char *cmp, int neg)
5306 {
5307 	struct tracer *trace = tr->current_trace;
5308 	struct tracer_flags *tracer_flags = trace->flags;
5309 	struct tracer_opt *opts = NULL;
5310 	int i;
5311 
5312 	for (i = 0; tracer_flags->opts[i].name; i++) {
5313 		opts = &tracer_flags->opts[i];
5314 
5315 		if (strcmp(cmp, opts->name) == 0)
5316 			return __set_tracer_option(tr, trace->flags, opts, neg);
5317 	}
5318 
5319 	return -EINVAL;
5320 }
5321 
5322 /* Some tracers require overwrite to stay enabled */
5323 int trace_keep_overwrite(struct tracer *tracer, u32 mask, int set)
5324 {
5325 	if (tracer->enabled && (mask & TRACE_ITER_OVERWRITE) && !set)
5326 		return -1;
5327 
5328 	return 0;
5329 }
5330 
5331 int set_tracer_flag(struct trace_array *tr, unsigned int mask, int enabled)
5332 {
5333 	if ((mask == TRACE_ITER_RECORD_TGID) ||
5334 	    (mask == TRACE_ITER_RECORD_CMD) ||
5335 	    (mask == TRACE_ITER_TRACE_PRINTK))
5336 		lockdep_assert_held(&event_mutex);
5337 
5338 	/* do nothing if flag is already set */
5339 	if (!!(tr->trace_flags & mask) == !!enabled)
5340 		return 0;
5341 
5342 	/* Give the tracer a chance to approve the change */
5343 	if (tr->current_trace->flag_changed)
5344 		if (tr->current_trace->flag_changed(tr, mask, !!enabled))
5345 			return -EINVAL;
5346 
5347 	if (mask == TRACE_ITER_TRACE_PRINTK) {
5348 		if (enabled) {
5349 			update_printk_trace(tr);
5350 		} else {
5351 			/*
5352 			 * The global_trace cannot clear this.
5353 			 * It's flag only gets cleared if another instance sets it.
5354 			 */
5355 			if (printk_trace == &global_trace)
5356 				return -EINVAL;
5357 			/*
5358 			 * An instance must always have it set.
5359 			 * by default, that's the global_trace instane.
5360 			 */
5361 			if (printk_trace == tr)
5362 				update_printk_trace(&global_trace);
5363 		}
5364 	}
5365 
5366 	if (enabled)
5367 		tr->trace_flags |= mask;
5368 	else
5369 		tr->trace_flags &= ~mask;
5370 
5371 	if (mask == TRACE_ITER_RECORD_CMD)
5372 		trace_event_enable_cmd_record(enabled);
5373 
5374 	if (mask == TRACE_ITER_RECORD_TGID) {
5375 
5376 		if (trace_alloc_tgid_map() < 0) {
5377 			tr->trace_flags &= ~TRACE_ITER_RECORD_TGID;
5378 			return -ENOMEM;
5379 		}
5380 
5381 		trace_event_enable_tgid_record(enabled);
5382 	}
5383 
5384 	if (mask == TRACE_ITER_EVENT_FORK)
5385 		trace_event_follow_fork(tr, enabled);
5386 
5387 	if (mask == TRACE_ITER_FUNC_FORK)
5388 		ftrace_pid_follow_fork(tr, enabled);
5389 
5390 	if (mask == TRACE_ITER_OVERWRITE) {
5391 		ring_buffer_change_overwrite(tr->array_buffer.buffer, enabled);
5392 #ifdef CONFIG_TRACER_MAX_TRACE
5393 		ring_buffer_change_overwrite(tr->max_buffer.buffer, enabled);
5394 #endif
5395 	}
5396 
5397 	if (mask == TRACE_ITER_PRINTK) {
5398 		trace_printk_start_stop_comm(enabled);
5399 		trace_printk_control(enabled);
5400 	}
5401 
5402 	return 0;
5403 }
5404 
5405 int trace_set_options(struct trace_array *tr, char *option)
5406 {
5407 	char *cmp;
5408 	int neg = 0;
5409 	int ret;
5410 	size_t orig_len = strlen(option);
5411 	int len;
5412 
5413 	cmp = strstrip(option);
5414 
5415 	len = str_has_prefix(cmp, "no");
5416 	if (len)
5417 		neg = 1;
5418 
5419 	cmp += len;
5420 
5421 	mutex_lock(&event_mutex);
5422 	mutex_lock(&trace_types_lock);
5423 
5424 	ret = match_string(trace_options, -1, cmp);
5425 	/* If no option could be set, test the specific tracer options */
5426 	if (ret < 0)
5427 		ret = set_tracer_option(tr, cmp, neg);
5428 	else
5429 		ret = set_tracer_flag(tr, 1 << ret, !neg);
5430 
5431 	mutex_unlock(&trace_types_lock);
5432 	mutex_unlock(&event_mutex);
5433 
5434 	/*
5435 	 * If the first trailing whitespace is replaced with '\0' by strstrip,
5436 	 * turn it back into a space.
5437 	 */
5438 	if (orig_len > strlen(option))
5439 		option[strlen(option)] = ' ';
5440 
5441 	return ret;
5442 }
5443 
5444 static void __init apply_trace_boot_options(void)
5445 {
5446 	char *buf = trace_boot_options_buf;
5447 	char *option;
5448 
5449 	while (true) {
5450 		option = strsep(&buf, ",");
5451 
5452 		if (!option)
5453 			break;
5454 
5455 		if (*option)
5456 			trace_set_options(&global_trace, option);
5457 
5458 		/* Put back the comma to allow this to be called again */
5459 		if (buf)
5460 			*(buf - 1) = ',';
5461 	}
5462 }
5463 
5464 static ssize_t
5465 tracing_trace_options_write(struct file *filp, const char __user *ubuf,
5466 			size_t cnt, loff_t *ppos)
5467 {
5468 	struct seq_file *m = filp->private_data;
5469 	struct trace_array *tr = m->private;
5470 	char buf[64];
5471 	int ret;
5472 
5473 	if (cnt >= sizeof(buf))
5474 		return -EINVAL;
5475 
5476 	if (copy_from_user(buf, ubuf, cnt))
5477 		return -EFAULT;
5478 
5479 	buf[cnt] = 0;
5480 
5481 	ret = trace_set_options(tr, buf);
5482 	if (ret < 0)
5483 		return ret;
5484 
5485 	*ppos += cnt;
5486 
5487 	return cnt;
5488 }
5489 
5490 static int tracing_trace_options_open(struct inode *inode, struct file *file)
5491 {
5492 	struct trace_array *tr = inode->i_private;
5493 	int ret;
5494 
5495 	ret = tracing_check_open_get_tr(tr);
5496 	if (ret)
5497 		return ret;
5498 
5499 	ret = single_open(file, tracing_trace_options_show, inode->i_private);
5500 	if (ret < 0)
5501 		trace_array_put(tr);
5502 
5503 	return ret;
5504 }
5505 
5506 static const struct file_operations tracing_iter_fops = {
5507 	.open		= tracing_trace_options_open,
5508 	.read		= seq_read,
5509 	.llseek		= seq_lseek,
5510 	.release	= tracing_single_release_tr,
5511 	.write		= tracing_trace_options_write,
5512 };
5513 
5514 static const char readme_msg[] =
5515 	"tracing mini-HOWTO:\n\n"
5516 	"By default tracefs removes all OTH file permission bits.\n"
5517 	"When mounting tracefs an optional group id can be specified\n"
5518 	"which adds the group to every directory and file in tracefs:\n\n"
5519 	"\t e.g. mount -t tracefs [-o [gid=<gid>]] nodev /sys/kernel/tracing\n\n"
5520 	"# echo 0 > tracing_on : quick way to disable tracing\n"
5521 	"# echo 1 > tracing_on : quick way to re-enable tracing\n\n"
5522 	" Important files:\n"
5523 	"  trace\t\t\t- The static contents of the buffer\n"
5524 	"\t\t\t  To clear the buffer write into this file: echo > trace\n"
5525 	"  trace_pipe\t\t- A consuming read to see the contents of the buffer\n"
5526 	"  current_tracer\t- function and latency tracers\n"
5527 	"  available_tracers\t- list of configured tracers for current_tracer\n"
5528 	"  error_log\t- error log for failed commands (that support it)\n"
5529 	"  buffer_size_kb\t- view and modify size of per cpu buffer\n"
5530 	"  buffer_total_size_kb  - view total size of all cpu buffers\n\n"
5531 	"  trace_clock\t\t- change the clock used to order events\n"
5532 	"       local:   Per cpu clock but may not be synced across CPUs\n"
5533 	"      global:   Synced across CPUs but slows tracing down.\n"
5534 	"     counter:   Not a clock, but just an increment\n"
5535 	"      uptime:   Jiffy counter from time of boot\n"
5536 	"        perf:   Same clock that perf events use\n"
5537 #ifdef CONFIG_X86_64
5538 	"     x86-tsc:   TSC cycle counter\n"
5539 #endif
5540 	"\n  timestamp_mode\t- view the mode used to timestamp events\n"
5541 	"       delta:   Delta difference against a buffer-wide timestamp\n"
5542 	"    absolute:   Absolute (standalone) timestamp\n"
5543 	"\n  trace_marker\t\t- Writes into this file writes into the kernel buffer\n"
5544 	"\n  trace_marker_raw\t\t- Writes into this file writes binary data into the kernel buffer\n"
5545 	"  tracing_cpumask\t- Limit which CPUs to trace\n"
5546 	"  instances\t\t- Make sub-buffers with: mkdir instances/foo\n"
5547 	"\t\t\t  Remove sub-buffer with rmdir\n"
5548 	"  trace_options\t\t- Set format or modify how tracing happens\n"
5549 	"\t\t\t  Disable an option by prefixing 'no' to the\n"
5550 	"\t\t\t  option name\n"
5551 	"  saved_cmdlines_size\t- echo command number in here to store comm-pid list\n"
5552 #ifdef CONFIG_DYNAMIC_FTRACE
5553 	"\n  available_filter_functions - list of functions that can be filtered on\n"
5554 	"  set_ftrace_filter\t- echo function name in here to only trace these\n"
5555 	"\t\t\t  functions\n"
5556 	"\t     accepts: func_full_name or glob-matching-pattern\n"
5557 	"\t     modules: Can select a group via module\n"
5558 	"\t      Format: :mod:<module-name>\n"
5559 	"\t     example: echo :mod:ext3 > set_ftrace_filter\n"
5560 	"\t    triggers: a command to perform when function is hit\n"
5561 	"\t      Format: <function>:<trigger>[:count]\n"
5562 	"\t     trigger: traceon, traceoff\n"
5563 	"\t\t      enable_event:<system>:<event>\n"
5564 	"\t\t      disable_event:<system>:<event>\n"
5565 #ifdef CONFIG_STACKTRACE
5566 	"\t\t      stacktrace\n"
5567 #endif
5568 #ifdef CONFIG_TRACER_SNAPSHOT
5569 	"\t\t      snapshot\n"
5570 #endif
5571 	"\t\t      dump\n"
5572 	"\t\t      cpudump\n"
5573 	"\t     example: echo do_fault:traceoff > set_ftrace_filter\n"
5574 	"\t              echo do_trap:traceoff:3 > set_ftrace_filter\n"
5575 	"\t     The first one will disable tracing every time do_fault is hit\n"
5576 	"\t     The second will disable tracing at most 3 times when do_trap is hit\n"
5577 	"\t       The first time do trap is hit and it disables tracing, the\n"
5578 	"\t       counter will decrement to 2. If tracing is already disabled,\n"
5579 	"\t       the counter will not decrement. It only decrements when the\n"
5580 	"\t       trigger did work\n"
5581 	"\t     To remove trigger without count:\n"
5582 	"\t       echo '!<function>:<trigger> > set_ftrace_filter\n"
5583 	"\t     To remove trigger with a count:\n"
5584 	"\t       echo '!<function>:<trigger>:0 > set_ftrace_filter\n"
5585 	"  set_ftrace_notrace\t- echo function name in here to never trace.\n"
5586 	"\t    accepts: func_full_name, *func_end, func_begin*, *func_middle*\n"
5587 	"\t    modules: Can select a group via module command :mod:\n"
5588 	"\t    Does not accept triggers\n"
5589 #endif /* CONFIG_DYNAMIC_FTRACE */
5590 #ifdef CONFIG_FUNCTION_TRACER
5591 	"  set_ftrace_pid\t- Write pid(s) to only function trace those pids\n"
5592 	"\t\t    (function)\n"
5593 	"  set_ftrace_notrace_pid\t- Write pid(s) to not function trace those pids\n"
5594 	"\t\t    (function)\n"
5595 #endif
5596 #ifdef CONFIG_FUNCTION_GRAPH_TRACER
5597 	"  set_graph_function\t- Trace the nested calls of a function (function_graph)\n"
5598 	"  set_graph_notrace\t- Do not trace the nested calls of a function (function_graph)\n"
5599 	"  max_graph_depth\t- Trace a limited depth of nested calls (0 is unlimited)\n"
5600 #endif
5601 #ifdef CONFIG_TRACER_SNAPSHOT
5602 	"\n  snapshot\t\t- Like 'trace' but shows the content of the static\n"
5603 	"\t\t\t  snapshot buffer. Read the contents for more\n"
5604 	"\t\t\t  information\n"
5605 #endif
5606 #ifdef CONFIG_STACK_TRACER
5607 	"  stack_trace\t\t- Shows the max stack trace when active\n"
5608 	"  stack_max_size\t- Shows current max stack size that was traced\n"
5609 	"\t\t\t  Write into this file to reset the max size (trigger a\n"
5610 	"\t\t\t  new trace)\n"
5611 #ifdef CONFIG_DYNAMIC_FTRACE
5612 	"  stack_trace_filter\t- Like set_ftrace_filter but limits what stack_trace\n"
5613 	"\t\t\t  traces\n"
5614 #endif
5615 #endif /* CONFIG_STACK_TRACER */
5616 #ifdef CONFIG_DYNAMIC_EVENTS
5617 	"  dynamic_events\t\t- Create/append/remove/show the generic dynamic events\n"
5618 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5619 #endif
5620 #ifdef CONFIG_KPROBE_EVENTS
5621 	"  kprobe_events\t\t- Create/append/remove/show the kernel dynamic events\n"
5622 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5623 #endif
5624 #ifdef CONFIG_UPROBE_EVENTS
5625 	"  uprobe_events\t\t- Create/append/remove/show the userspace dynamic events\n"
5626 	"\t\t\t  Write into this file to define/undefine new trace events.\n"
5627 #endif
5628 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS) || \
5629     defined(CONFIG_FPROBE_EVENTS)
5630 	"\t  accepts: event-definitions (one definition per line)\n"
5631 #if defined(CONFIG_KPROBE_EVENTS) || defined(CONFIG_UPROBE_EVENTS)
5632 	"\t   Format: p[:[<group>/][<event>]] <place> [<args>]\n"
5633 	"\t           r[maxactive][:[<group>/][<event>]] <place> [<args>]\n"
5634 #endif
5635 #ifdef CONFIG_FPROBE_EVENTS
5636 	"\t           f[:[<group>/][<event>]] <func-name>[%return] [<args>]\n"
5637 	"\t           t[:[<group>/][<event>]] <tracepoint> [<args>]\n"
5638 #endif
5639 #ifdef CONFIG_HIST_TRIGGERS
5640 	"\t           s:[synthetic/]<event> <field> [<field>]\n"
5641 #endif
5642 	"\t           e[:[<group>/][<event>]] <attached-group>.<attached-event> [<args>] [if <filter>]\n"
5643 	"\t           -:[<group>/][<event>]\n"
5644 #ifdef CONFIG_KPROBE_EVENTS
5645 	"\t    place: [<module>:]<symbol>[+<offset>]|<memaddr>\n"
5646   "place (kretprobe): [<module>:]<symbol>[+<offset>]%return|<memaddr>\n"
5647 #endif
5648 #ifdef CONFIG_UPROBE_EVENTS
5649   "   place (uprobe): <path>:<offset>[%return][(ref_ctr_offset)]\n"
5650 #endif
5651 	"\t     args: <name>=fetcharg[:type]\n"
5652 	"\t fetcharg: (%<register>|$<efield>), @<address>, @<symbol>[+|-<offset>],\n"
5653 #ifdef CONFIG_HAVE_FUNCTION_ARG_ACCESS_API
5654 	"\t           $stack<index>, $stack, $retval, $comm, $arg<N>,\n"
5655 #ifdef CONFIG_PROBE_EVENTS_BTF_ARGS
5656 	"\t           <argname>[->field[->field|.field...]],\n"
5657 #endif
5658 #else
5659 	"\t           $stack<index>, $stack, $retval, $comm,\n"
5660 #endif
5661 	"\t           +|-[u]<offset>(<fetcharg>), \\imm-value, \\\"imm-string\"\n"
5662 	"\t     kernel return probes support: $retval, $arg<N>, $comm\n"
5663 	"\t     type: s8/16/32/64, u8/16/32/64, x8/16/32/64, char, string, symbol,\n"
5664 	"\t           b<bit-width>@<bit-offset>/<container-size>, ustring,\n"
5665 	"\t           symstr, %pd/%pD, <type>\\[<array-size>\\]\n"
5666 #ifdef CONFIG_HIST_TRIGGERS
5667 	"\t    field: <stype> <name>;\n"
5668 	"\t    stype: u8/u16/u32/u64, s8/s16/s32/s64, pid_t,\n"
5669 	"\t           [unsigned] char/int/long\n"
5670 #endif
5671 	"\t    efield: For event probes ('e' types), the field is on of the fields\n"
5672 	"\t            of the <attached-group>/<attached-event>.\n"
5673 #endif
5674 	"  events/\t\t- Directory containing all trace event subsystems:\n"
5675 	"      enable\t\t- Write 0/1 to enable/disable tracing of all events\n"
5676 	"  events/<system>/\t- Directory containing all trace events for <system>:\n"
5677 	"      enable\t\t- Write 0/1 to enable/disable tracing of all <system>\n"
5678 	"\t\t\t  events\n"
5679 	"      filter\t\t- If set, only events passing filter are traced\n"
5680 	"  events/<system>/<event>/\t- Directory containing control files for\n"
5681 	"\t\t\t  <event>:\n"
5682 	"      enable\t\t- Write 0/1 to enable/disable tracing of <event>\n"
5683 	"      filter\t\t- If set, only events passing filter are traced\n"
5684 	"      trigger\t\t- If set, a command to perform when event is hit\n"
5685 	"\t    Format: <trigger>[:count][if <filter>]\n"
5686 	"\t   trigger: traceon, traceoff\n"
5687 	"\t            enable_event:<system>:<event>\n"
5688 	"\t            disable_event:<system>:<event>\n"
5689 #ifdef CONFIG_HIST_TRIGGERS
5690 	"\t            enable_hist:<system>:<event>\n"
5691 	"\t            disable_hist:<system>:<event>\n"
5692 #endif
5693 #ifdef CONFIG_STACKTRACE
5694 	"\t\t    stacktrace\n"
5695 #endif
5696 #ifdef CONFIG_TRACER_SNAPSHOT
5697 	"\t\t    snapshot\n"
5698 #endif
5699 #ifdef CONFIG_HIST_TRIGGERS
5700 	"\t\t    hist (see below)\n"
5701 #endif
5702 	"\t   example: echo traceoff > events/block/block_unplug/trigger\n"
5703 	"\t            echo traceoff:3 > events/block/block_unplug/trigger\n"
5704 	"\t            echo 'enable_event:kmem:kmalloc:3 if nr_rq > 1' > \\\n"
5705 	"\t                  events/block/block_unplug/trigger\n"
5706 	"\t   The first disables tracing every time block_unplug is hit.\n"
5707 	"\t   The second disables tracing the first 3 times block_unplug is hit.\n"
5708 	"\t   The third enables the kmalloc event the first 3 times block_unplug\n"
5709 	"\t     is hit and has value of greater than 1 for the 'nr_rq' event field.\n"
5710 	"\t   Like function triggers, the counter is only decremented if it\n"
5711 	"\t    enabled or disabled tracing.\n"
5712 	"\t   To remove a trigger without a count:\n"
5713 	"\t     echo '!<trigger> > <system>/<event>/trigger\n"
5714 	"\t   To remove a trigger with a count:\n"
5715 	"\t     echo '!<trigger>:0 > <system>/<event>/trigger\n"
5716 	"\t   Filters can be ignored when removing a trigger.\n"
5717 #ifdef CONFIG_HIST_TRIGGERS
5718 	"      hist trigger\t- If set, event hits are aggregated into a hash table\n"
5719 	"\t    Format: hist:keys=<field1[,field2,...]>\n"
5720 	"\t            [:<var1>=<field|var_ref|numeric_literal>[,<var2>=...]]\n"
5721 	"\t            [:values=<field1[,field2,...]>]\n"
5722 	"\t            [:sort=<field1[,field2,...]>]\n"
5723 	"\t            [:size=#entries]\n"
5724 	"\t            [:pause][:continue][:clear]\n"
5725 	"\t            [:name=histname1]\n"
5726 	"\t            [:nohitcount]\n"
5727 	"\t            [:<handler>.<action>]\n"
5728 	"\t            [if <filter>]\n\n"
5729 	"\t    Note, special fields can be used as well:\n"
5730 	"\t            common_timestamp - to record current timestamp\n"
5731 	"\t            common_cpu - to record the CPU the event happened on\n"
5732 	"\n"
5733 	"\t    A hist trigger variable can be:\n"
5734 	"\t        - a reference to a field e.g. x=current_timestamp,\n"
5735 	"\t        - a reference to another variable e.g. y=$x,\n"
5736 	"\t        - a numeric literal: e.g. ms_per_sec=1000,\n"
5737 	"\t        - an arithmetic expression: e.g. time_secs=current_timestamp/1000\n"
5738 	"\n"
5739 	"\t    hist trigger arithmetic expressions support addition(+), subtraction(-),\n"
5740 	"\t    multiplication(*) and division(/) operators. An operand can be either a\n"
5741 	"\t    variable reference, field or numeric literal.\n"
5742 	"\n"
5743 	"\t    When a matching event is hit, an entry is added to a hash\n"
5744 	"\t    table using the key(s) and value(s) named, and the value of a\n"
5745 	"\t    sum called 'hitcount' is incremented.  Keys and values\n"
5746 	"\t    correspond to fields in the event's format description.  Keys\n"
5747 	"\t    can be any field, or the special string 'common_stacktrace'.\n"
5748 	"\t    Compound keys consisting of up to two fields can be specified\n"
5749 	"\t    by the 'keys' keyword.  Values must correspond to numeric\n"
5750 	"\t    fields.  Sort keys consisting of up to two fields can be\n"
5751 	"\t    specified using the 'sort' keyword.  The sort direction can\n"
5752 	"\t    be modified by appending '.descending' or '.ascending' to a\n"
5753 	"\t    sort field.  The 'size' parameter can be used to specify more\n"
5754 	"\t    or fewer than the default 2048 entries for the hashtable size.\n"
5755 	"\t    If a hist trigger is given a name using the 'name' parameter,\n"
5756 	"\t    its histogram data will be shared with other triggers of the\n"
5757 	"\t    same name, and trigger hits will update this common data.\n\n"
5758 	"\t    Reading the 'hist' file for the event will dump the hash\n"
5759 	"\t    table in its entirety to stdout.  If there are multiple hist\n"
5760 	"\t    triggers attached to an event, there will be a table for each\n"
5761 	"\t    trigger in the output.  The table displayed for a named\n"
5762 	"\t    trigger will be the same as any other instance having the\n"
5763 	"\t    same name.  The default format used to display a given field\n"
5764 	"\t    can be modified by appending any of the following modifiers\n"
5765 	"\t    to the field name, as applicable:\n\n"
5766 	"\t            .hex        display a number as a hex value\n"
5767 	"\t            .sym        display an address as a symbol\n"
5768 	"\t            .sym-offset display an address as a symbol and offset\n"
5769 	"\t            .execname   display a common_pid as a program name\n"
5770 	"\t            .syscall    display a syscall id as a syscall name\n"
5771 	"\t            .log2       display log2 value rather than raw number\n"
5772 	"\t            .buckets=size  display values in groups of size rather than raw number\n"
5773 	"\t            .usecs      display a common_timestamp in microseconds\n"
5774 	"\t            .percent    display a number of percentage value\n"
5775 	"\t            .graph      display a bar-graph of a value\n\n"
5776 	"\t    The 'pause' parameter can be used to pause an existing hist\n"
5777 	"\t    trigger or to start a hist trigger but not log any events\n"
5778 	"\t    until told to do so.  'continue' can be used to start or\n"
5779 	"\t    restart a paused hist trigger.\n\n"
5780 	"\t    The 'clear' parameter will clear the contents of a running\n"
5781 	"\t    hist trigger and leave its current paused/active state\n"
5782 	"\t    unchanged.\n\n"
5783 	"\t    The 'nohitcount' (or NOHC) parameter will suppress display of\n"
5784 	"\t    raw hitcount in the histogram.\n\n"
5785 	"\t    The enable_hist and disable_hist triggers can be used to\n"
5786 	"\t    have one event conditionally start and stop another event's\n"
5787 	"\t    already-attached hist trigger.  The syntax is analogous to\n"
5788 	"\t    the enable_event and disable_event triggers.\n\n"
5789 	"\t    Hist trigger handlers and actions are executed whenever a\n"
5790 	"\t    a histogram entry is added or updated.  They take the form:\n\n"
5791 	"\t        <handler>.<action>\n\n"
5792 	"\t    The available handlers are:\n\n"
5793 	"\t        onmatch(matching.event)  - invoke on addition or update\n"
5794 	"\t        onmax(var)               - invoke if var exceeds current max\n"
5795 	"\t        onchange(var)            - invoke action if var changes\n\n"
5796 	"\t    The available actions are:\n\n"
5797 	"\t        trace(<synthetic_event>,param list)  - generate synthetic event\n"
5798 	"\t        save(field,...)                      - save current event fields\n"
5799 #ifdef CONFIG_TRACER_SNAPSHOT
5800 	"\t        snapshot()                           - snapshot the trace buffer\n\n"
5801 #endif
5802 #ifdef CONFIG_SYNTH_EVENTS
5803 	"  events/synthetic_events\t- Create/append/remove/show synthetic events\n"
5804 	"\t  Write into this file to define/undefine new synthetic events.\n"
5805 	"\t     example: echo 'myevent u64 lat; char name[]; long[] stack' >> synthetic_events\n"
5806 #endif
5807 #endif
5808 ;
5809 
5810 static ssize_t
5811 tracing_readme_read(struct file *filp, char __user *ubuf,
5812 		       size_t cnt, loff_t *ppos)
5813 {
5814 	return simple_read_from_buffer(ubuf, cnt, ppos,
5815 					readme_msg, strlen(readme_msg));
5816 }
5817 
5818 static const struct file_operations tracing_readme_fops = {
5819 	.open		= tracing_open_generic,
5820 	.read		= tracing_readme_read,
5821 	.llseek		= generic_file_llseek,
5822 };
5823 
5824 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
5825 static union trace_eval_map_item *
5826 update_eval_map(union trace_eval_map_item *ptr)
5827 {
5828 	if (!ptr->map.eval_string) {
5829 		if (ptr->tail.next) {
5830 			ptr = ptr->tail.next;
5831 			/* Set ptr to the next real item (skip head) */
5832 			ptr++;
5833 		} else
5834 			return NULL;
5835 	}
5836 	return ptr;
5837 }
5838 
5839 static void *eval_map_next(struct seq_file *m, void *v, loff_t *pos)
5840 {
5841 	union trace_eval_map_item *ptr = v;
5842 
5843 	/*
5844 	 * Paranoid! If ptr points to end, we don't want to increment past it.
5845 	 * This really should never happen.
5846 	 */
5847 	(*pos)++;
5848 	ptr = update_eval_map(ptr);
5849 	if (WARN_ON_ONCE(!ptr))
5850 		return NULL;
5851 
5852 	ptr++;
5853 	ptr = update_eval_map(ptr);
5854 
5855 	return ptr;
5856 }
5857 
5858 static void *eval_map_start(struct seq_file *m, loff_t *pos)
5859 {
5860 	union trace_eval_map_item *v;
5861 	loff_t l = 0;
5862 
5863 	mutex_lock(&trace_eval_mutex);
5864 
5865 	v = trace_eval_maps;
5866 	if (v)
5867 		v++;
5868 
5869 	while (v && l < *pos) {
5870 		v = eval_map_next(m, v, &l);
5871 	}
5872 
5873 	return v;
5874 }
5875 
5876 static void eval_map_stop(struct seq_file *m, void *v)
5877 {
5878 	mutex_unlock(&trace_eval_mutex);
5879 }
5880 
5881 static int eval_map_show(struct seq_file *m, void *v)
5882 {
5883 	union trace_eval_map_item *ptr = v;
5884 
5885 	seq_printf(m, "%s %ld (%s)\n",
5886 		   ptr->map.eval_string, ptr->map.eval_value,
5887 		   ptr->map.system);
5888 
5889 	return 0;
5890 }
5891 
5892 static const struct seq_operations tracing_eval_map_seq_ops = {
5893 	.start		= eval_map_start,
5894 	.next		= eval_map_next,
5895 	.stop		= eval_map_stop,
5896 	.show		= eval_map_show,
5897 };
5898 
5899 static int tracing_eval_map_open(struct inode *inode, struct file *filp)
5900 {
5901 	int ret;
5902 
5903 	ret = tracing_check_open_get_tr(NULL);
5904 	if (ret)
5905 		return ret;
5906 
5907 	return seq_open(filp, &tracing_eval_map_seq_ops);
5908 }
5909 
5910 static const struct file_operations tracing_eval_map_fops = {
5911 	.open		= tracing_eval_map_open,
5912 	.read		= seq_read,
5913 	.llseek		= seq_lseek,
5914 	.release	= seq_release,
5915 };
5916 
5917 static inline union trace_eval_map_item *
5918 trace_eval_jmp_to_tail(union trace_eval_map_item *ptr)
5919 {
5920 	/* Return tail of array given the head */
5921 	return ptr + ptr->head.length + 1;
5922 }
5923 
5924 static void
5925 trace_insert_eval_map_file(struct module *mod, struct trace_eval_map **start,
5926 			   int len)
5927 {
5928 	struct trace_eval_map **stop;
5929 	struct trace_eval_map **map;
5930 	union trace_eval_map_item *map_array;
5931 	union trace_eval_map_item *ptr;
5932 
5933 	stop = start + len;
5934 
5935 	/*
5936 	 * The trace_eval_maps contains the map plus a head and tail item,
5937 	 * where the head holds the module and length of array, and the
5938 	 * tail holds a pointer to the next list.
5939 	 */
5940 	map_array = kmalloc_array(len + 2, sizeof(*map_array), GFP_KERNEL);
5941 	if (!map_array) {
5942 		pr_warn("Unable to allocate trace eval mapping\n");
5943 		return;
5944 	}
5945 
5946 	mutex_lock(&trace_eval_mutex);
5947 
5948 	if (!trace_eval_maps)
5949 		trace_eval_maps = map_array;
5950 	else {
5951 		ptr = trace_eval_maps;
5952 		for (;;) {
5953 			ptr = trace_eval_jmp_to_tail(ptr);
5954 			if (!ptr->tail.next)
5955 				break;
5956 			ptr = ptr->tail.next;
5957 
5958 		}
5959 		ptr->tail.next = map_array;
5960 	}
5961 	map_array->head.mod = mod;
5962 	map_array->head.length = len;
5963 	map_array++;
5964 
5965 	for (map = start; (unsigned long)map < (unsigned long)stop; map++) {
5966 		map_array->map = **map;
5967 		map_array++;
5968 	}
5969 	memset(map_array, 0, sizeof(*map_array));
5970 
5971 	mutex_unlock(&trace_eval_mutex);
5972 }
5973 
5974 static void trace_create_eval_file(struct dentry *d_tracer)
5975 {
5976 	trace_create_file("eval_map", TRACE_MODE_READ, d_tracer,
5977 			  NULL, &tracing_eval_map_fops);
5978 }
5979 
5980 #else /* CONFIG_TRACE_EVAL_MAP_FILE */
5981 static inline void trace_create_eval_file(struct dentry *d_tracer) { }
5982 static inline void trace_insert_eval_map_file(struct module *mod,
5983 			      struct trace_eval_map **start, int len) { }
5984 #endif /* !CONFIG_TRACE_EVAL_MAP_FILE */
5985 
5986 static void trace_insert_eval_map(struct module *mod,
5987 				  struct trace_eval_map **start, int len)
5988 {
5989 	struct trace_eval_map **map;
5990 
5991 	if (len <= 0)
5992 		return;
5993 
5994 	map = start;
5995 
5996 	trace_event_eval_update(map, len);
5997 
5998 	trace_insert_eval_map_file(mod, start, len);
5999 }
6000 
6001 static ssize_t
6002 tracing_set_trace_read(struct file *filp, char __user *ubuf,
6003 		       size_t cnt, loff_t *ppos)
6004 {
6005 	struct trace_array *tr = filp->private_data;
6006 	char buf[MAX_TRACER_SIZE+2];
6007 	int r;
6008 
6009 	mutex_lock(&trace_types_lock);
6010 	r = sprintf(buf, "%s\n", tr->current_trace->name);
6011 	mutex_unlock(&trace_types_lock);
6012 
6013 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6014 }
6015 
6016 int tracer_init(struct tracer *t, struct trace_array *tr)
6017 {
6018 	tracing_reset_online_cpus(&tr->array_buffer);
6019 	return t->init(tr);
6020 }
6021 
6022 static void set_buffer_entries(struct array_buffer *buf, unsigned long val)
6023 {
6024 	int cpu;
6025 
6026 	for_each_tracing_cpu(cpu)
6027 		per_cpu_ptr(buf->data, cpu)->entries = val;
6028 }
6029 
6030 static void update_buffer_entries(struct array_buffer *buf, int cpu)
6031 {
6032 	if (cpu == RING_BUFFER_ALL_CPUS) {
6033 		set_buffer_entries(buf, ring_buffer_size(buf->buffer, 0));
6034 	} else {
6035 		per_cpu_ptr(buf->data, cpu)->entries = ring_buffer_size(buf->buffer, cpu);
6036 	}
6037 }
6038 
6039 #ifdef CONFIG_TRACER_MAX_TRACE
6040 /* resize @tr's buffer to the size of @size_tr's entries */
6041 static int resize_buffer_duplicate_size(struct array_buffer *trace_buf,
6042 					struct array_buffer *size_buf, int cpu_id)
6043 {
6044 	int cpu, ret = 0;
6045 
6046 	if (cpu_id == RING_BUFFER_ALL_CPUS) {
6047 		for_each_tracing_cpu(cpu) {
6048 			ret = ring_buffer_resize(trace_buf->buffer,
6049 				 per_cpu_ptr(size_buf->data, cpu)->entries, cpu);
6050 			if (ret < 0)
6051 				break;
6052 			per_cpu_ptr(trace_buf->data, cpu)->entries =
6053 				per_cpu_ptr(size_buf->data, cpu)->entries;
6054 		}
6055 	} else {
6056 		ret = ring_buffer_resize(trace_buf->buffer,
6057 				 per_cpu_ptr(size_buf->data, cpu_id)->entries, cpu_id);
6058 		if (ret == 0)
6059 			per_cpu_ptr(trace_buf->data, cpu_id)->entries =
6060 				per_cpu_ptr(size_buf->data, cpu_id)->entries;
6061 	}
6062 
6063 	return ret;
6064 }
6065 #endif /* CONFIG_TRACER_MAX_TRACE */
6066 
6067 static int __tracing_resize_ring_buffer(struct trace_array *tr,
6068 					unsigned long size, int cpu)
6069 {
6070 	int ret;
6071 
6072 	/*
6073 	 * If kernel or user changes the size of the ring buffer
6074 	 * we use the size that was given, and we can forget about
6075 	 * expanding it later.
6076 	 */
6077 	trace_set_ring_buffer_expanded(tr);
6078 
6079 	/* May be called before buffers are initialized */
6080 	if (!tr->array_buffer.buffer)
6081 		return 0;
6082 
6083 	/* Do not allow tracing while resizing ring buffer */
6084 	tracing_stop_tr(tr);
6085 
6086 	ret = ring_buffer_resize(tr->array_buffer.buffer, size, cpu);
6087 	if (ret < 0)
6088 		goto out_start;
6089 
6090 #ifdef CONFIG_TRACER_MAX_TRACE
6091 	if (!tr->allocated_snapshot)
6092 		goto out;
6093 
6094 	ret = ring_buffer_resize(tr->max_buffer.buffer, size, cpu);
6095 	if (ret < 0) {
6096 		int r = resize_buffer_duplicate_size(&tr->array_buffer,
6097 						     &tr->array_buffer, cpu);
6098 		if (r < 0) {
6099 			/*
6100 			 * AARGH! We are left with different
6101 			 * size max buffer!!!!
6102 			 * The max buffer is our "snapshot" buffer.
6103 			 * When a tracer needs a snapshot (one of the
6104 			 * latency tracers), it swaps the max buffer
6105 			 * with the saved snap shot. We succeeded to
6106 			 * update the size of the main buffer, but failed to
6107 			 * update the size of the max buffer. But when we tried
6108 			 * to reset the main buffer to the original size, we
6109 			 * failed there too. This is very unlikely to
6110 			 * happen, but if it does, warn and kill all
6111 			 * tracing.
6112 			 */
6113 			WARN_ON(1);
6114 			tracing_disabled = 1;
6115 		}
6116 		goto out_start;
6117 	}
6118 
6119 	update_buffer_entries(&tr->max_buffer, cpu);
6120 
6121  out:
6122 #endif /* CONFIG_TRACER_MAX_TRACE */
6123 
6124 	update_buffer_entries(&tr->array_buffer, cpu);
6125  out_start:
6126 	tracing_start_tr(tr);
6127 	return ret;
6128 }
6129 
6130 ssize_t tracing_resize_ring_buffer(struct trace_array *tr,
6131 				  unsigned long size, int cpu_id)
6132 {
6133 	int ret;
6134 
6135 	mutex_lock(&trace_types_lock);
6136 
6137 	if (cpu_id != RING_BUFFER_ALL_CPUS) {
6138 		/* make sure, this cpu is enabled in the mask */
6139 		if (!cpumask_test_cpu(cpu_id, tracing_buffer_mask)) {
6140 			ret = -EINVAL;
6141 			goto out;
6142 		}
6143 	}
6144 
6145 	ret = __tracing_resize_ring_buffer(tr, size, cpu_id);
6146 	if (ret < 0)
6147 		ret = -ENOMEM;
6148 
6149 out:
6150 	mutex_unlock(&trace_types_lock);
6151 
6152 	return ret;
6153 }
6154 
6155 static void update_last_data(struct trace_array *tr)
6156 {
6157 	if (!tr->text_delta && !tr->data_delta)
6158 		return;
6159 
6160 	/*
6161 	 * Need to clear all CPU buffers as there cannot be events
6162 	 * from the previous boot mixed with events with this boot
6163 	 * as that will cause a confusing trace. Need to clear all
6164 	 * CPU buffers, even for those that may currently be offline.
6165 	 */
6166 	tracing_reset_all_cpus(&tr->array_buffer);
6167 
6168 	/* Using current data now */
6169 	tr->text_delta = 0;
6170 	tr->data_delta = 0;
6171 }
6172 
6173 /**
6174  * tracing_update_buffers - used by tracing facility to expand ring buffers
6175  * @tr: The tracing instance
6176  *
6177  * To save on memory when the tracing is never used on a system with it
6178  * configured in. The ring buffers are set to a minimum size. But once
6179  * a user starts to use the tracing facility, then they need to grow
6180  * to their default size.
6181  *
6182  * This function is to be called when a tracer is about to be used.
6183  */
6184 int tracing_update_buffers(struct trace_array *tr)
6185 {
6186 	int ret = 0;
6187 
6188 	mutex_lock(&trace_types_lock);
6189 
6190 	update_last_data(tr);
6191 
6192 	if (!tr->ring_buffer_expanded)
6193 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6194 						RING_BUFFER_ALL_CPUS);
6195 	mutex_unlock(&trace_types_lock);
6196 
6197 	return ret;
6198 }
6199 
6200 struct trace_option_dentry;
6201 
6202 static void
6203 create_trace_option_files(struct trace_array *tr, struct tracer *tracer);
6204 
6205 /*
6206  * Used to clear out the tracer before deletion of an instance.
6207  * Must have trace_types_lock held.
6208  */
6209 static void tracing_set_nop(struct trace_array *tr)
6210 {
6211 	if (tr->current_trace == &nop_trace)
6212 		return;
6213 
6214 	tr->current_trace->enabled--;
6215 
6216 	if (tr->current_trace->reset)
6217 		tr->current_trace->reset(tr);
6218 
6219 	tr->current_trace = &nop_trace;
6220 }
6221 
6222 static bool tracer_options_updated;
6223 
6224 static void add_tracer_options(struct trace_array *tr, struct tracer *t)
6225 {
6226 	/* Only enable if the directory has been created already. */
6227 	if (!tr->dir)
6228 		return;
6229 
6230 	/* Only create trace option files after update_tracer_options finish */
6231 	if (!tracer_options_updated)
6232 		return;
6233 
6234 	create_trace_option_files(tr, t);
6235 }
6236 
6237 int tracing_set_tracer(struct trace_array *tr, const char *buf)
6238 {
6239 	struct tracer *t;
6240 #ifdef CONFIG_TRACER_MAX_TRACE
6241 	bool had_max_tr;
6242 #endif
6243 	int ret = 0;
6244 
6245 	mutex_lock(&trace_types_lock);
6246 
6247 	update_last_data(tr);
6248 
6249 	if (!tr->ring_buffer_expanded) {
6250 		ret = __tracing_resize_ring_buffer(tr, trace_buf_size,
6251 						RING_BUFFER_ALL_CPUS);
6252 		if (ret < 0)
6253 			goto out;
6254 		ret = 0;
6255 	}
6256 
6257 	for (t = trace_types; t; t = t->next) {
6258 		if (strcmp(t->name, buf) == 0)
6259 			break;
6260 	}
6261 	if (!t) {
6262 		ret = -EINVAL;
6263 		goto out;
6264 	}
6265 	if (t == tr->current_trace)
6266 		goto out;
6267 
6268 #ifdef CONFIG_TRACER_SNAPSHOT
6269 	if (t->use_max_tr) {
6270 		local_irq_disable();
6271 		arch_spin_lock(&tr->max_lock);
6272 		if (tr->cond_snapshot)
6273 			ret = -EBUSY;
6274 		arch_spin_unlock(&tr->max_lock);
6275 		local_irq_enable();
6276 		if (ret)
6277 			goto out;
6278 	}
6279 #endif
6280 	/* Some tracers won't work on kernel command line */
6281 	if (system_state < SYSTEM_RUNNING && t->noboot) {
6282 		pr_warn("Tracer '%s' is not allowed on command line, ignored\n",
6283 			t->name);
6284 		goto out;
6285 	}
6286 
6287 	/* Some tracers are only allowed for the top level buffer */
6288 	if (!trace_ok_for_array(t, tr)) {
6289 		ret = -EINVAL;
6290 		goto out;
6291 	}
6292 
6293 	/* If trace pipe files are being read, we can't change the tracer */
6294 	if (tr->trace_ref) {
6295 		ret = -EBUSY;
6296 		goto out;
6297 	}
6298 
6299 	trace_branch_disable();
6300 
6301 	tr->current_trace->enabled--;
6302 
6303 	if (tr->current_trace->reset)
6304 		tr->current_trace->reset(tr);
6305 
6306 #ifdef CONFIG_TRACER_MAX_TRACE
6307 	had_max_tr = tr->current_trace->use_max_tr;
6308 
6309 	/* Current trace needs to be nop_trace before synchronize_rcu */
6310 	tr->current_trace = &nop_trace;
6311 
6312 	if (had_max_tr && !t->use_max_tr) {
6313 		/*
6314 		 * We need to make sure that the update_max_tr sees that
6315 		 * current_trace changed to nop_trace to keep it from
6316 		 * swapping the buffers after we resize it.
6317 		 * The update_max_tr is called from interrupts disabled
6318 		 * so a synchronized_sched() is sufficient.
6319 		 */
6320 		synchronize_rcu();
6321 		free_snapshot(tr);
6322 		tracing_disarm_snapshot(tr);
6323 	}
6324 
6325 	if (!had_max_tr && t->use_max_tr) {
6326 		ret = tracing_arm_snapshot_locked(tr);
6327 		if (ret)
6328 			goto out;
6329 	}
6330 #else
6331 	tr->current_trace = &nop_trace;
6332 #endif
6333 
6334 	if (t->init) {
6335 		ret = tracer_init(t, tr);
6336 		if (ret) {
6337 #ifdef CONFIG_TRACER_MAX_TRACE
6338 			if (t->use_max_tr)
6339 				tracing_disarm_snapshot(tr);
6340 #endif
6341 			goto out;
6342 		}
6343 	}
6344 
6345 	tr->current_trace = t;
6346 	tr->current_trace->enabled++;
6347 	trace_branch_enable(tr);
6348  out:
6349 	mutex_unlock(&trace_types_lock);
6350 
6351 	return ret;
6352 }
6353 
6354 static ssize_t
6355 tracing_set_trace_write(struct file *filp, const char __user *ubuf,
6356 			size_t cnt, loff_t *ppos)
6357 {
6358 	struct trace_array *tr = filp->private_data;
6359 	char buf[MAX_TRACER_SIZE+1];
6360 	char *name;
6361 	size_t ret;
6362 	int err;
6363 
6364 	ret = cnt;
6365 
6366 	if (cnt > MAX_TRACER_SIZE)
6367 		cnt = MAX_TRACER_SIZE;
6368 
6369 	if (copy_from_user(buf, ubuf, cnt))
6370 		return -EFAULT;
6371 
6372 	buf[cnt] = 0;
6373 
6374 	name = strim(buf);
6375 
6376 	err = tracing_set_tracer(tr, name);
6377 	if (err)
6378 		return err;
6379 
6380 	*ppos += ret;
6381 
6382 	return ret;
6383 }
6384 
6385 static ssize_t
6386 tracing_nsecs_read(unsigned long *ptr, char __user *ubuf,
6387 		   size_t cnt, loff_t *ppos)
6388 {
6389 	char buf[64];
6390 	int r;
6391 
6392 	r = snprintf(buf, sizeof(buf), "%ld\n",
6393 		     *ptr == (unsigned long)-1 ? -1 : nsecs_to_usecs(*ptr));
6394 	if (r > sizeof(buf))
6395 		r = sizeof(buf);
6396 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6397 }
6398 
6399 static ssize_t
6400 tracing_nsecs_write(unsigned long *ptr, const char __user *ubuf,
6401 		    size_t cnt, loff_t *ppos)
6402 {
6403 	unsigned long val;
6404 	int ret;
6405 
6406 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6407 	if (ret)
6408 		return ret;
6409 
6410 	*ptr = val * 1000;
6411 
6412 	return cnt;
6413 }
6414 
6415 static ssize_t
6416 tracing_thresh_read(struct file *filp, char __user *ubuf,
6417 		    size_t cnt, loff_t *ppos)
6418 {
6419 	return tracing_nsecs_read(&tracing_thresh, ubuf, cnt, ppos);
6420 }
6421 
6422 static ssize_t
6423 tracing_thresh_write(struct file *filp, const char __user *ubuf,
6424 		     size_t cnt, loff_t *ppos)
6425 {
6426 	struct trace_array *tr = filp->private_data;
6427 	int ret;
6428 
6429 	mutex_lock(&trace_types_lock);
6430 	ret = tracing_nsecs_write(&tracing_thresh, ubuf, cnt, ppos);
6431 	if (ret < 0)
6432 		goto out;
6433 
6434 	if (tr->current_trace->update_thresh) {
6435 		ret = tr->current_trace->update_thresh(tr);
6436 		if (ret < 0)
6437 			goto out;
6438 	}
6439 
6440 	ret = cnt;
6441 out:
6442 	mutex_unlock(&trace_types_lock);
6443 
6444 	return ret;
6445 }
6446 
6447 #ifdef CONFIG_TRACER_MAX_TRACE
6448 
6449 static ssize_t
6450 tracing_max_lat_read(struct file *filp, char __user *ubuf,
6451 		     size_t cnt, loff_t *ppos)
6452 {
6453 	struct trace_array *tr = filp->private_data;
6454 
6455 	return tracing_nsecs_read(&tr->max_latency, ubuf, cnt, ppos);
6456 }
6457 
6458 static ssize_t
6459 tracing_max_lat_write(struct file *filp, const char __user *ubuf,
6460 		      size_t cnt, loff_t *ppos)
6461 {
6462 	struct trace_array *tr = filp->private_data;
6463 
6464 	return tracing_nsecs_write(&tr->max_latency, ubuf, cnt, ppos);
6465 }
6466 
6467 #endif
6468 
6469 static int open_pipe_on_cpu(struct trace_array *tr, int cpu)
6470 {
6471 	if (cpu == RING_BUFFER_ALL_CPUS) {
6472 		if (cpumask_empty(tr->pipe_cpumask)) {
6473 			cpumask_setall(tr->pipe_cpumask);
6474 			return 0;
6475 		}
6476 	} else if (!cpumask_test_cpu(cpu, tr->pipe_cpumask)) {
6477 		cpumask_set_cpu(cpu, tr->pipe_cpumask);
6478 		return 0;
6479 	}
6480 	return -EBUSY;
6481 }
6482 
6483 static void close_pipe_on_cpu(struct trace_array *tr, int cpu)
6484 {
6485 	if (cpu == RING_BUFFER_ALL_CPUS) {
6486 		WARN_ON(!cpumask_full(tr->pipe_cpumask));
6487 		cpumask_clear(tr->pipe_cpumask);
6488 	} else {
6489 		WARN_ON(!cpumask_test_cpu(cpu, tr->pipe_cpumask));
6490 		cpumask_clear_cpu(cpu, tr->pipe_cpumask);
6491 	}
6492 }
6493 
6494 static int tracing_open_pipe(struct inode *inode, struct file *filp)
6495 {
6496 	struct trace_array *tr = inode->i_private;
6497 	struct trace_iterator *iter;
6498 	int cpu;
6499 	int ret;
6500 
6501 	ret = tracing_check_open_get_tr(tr);
6502 	if (ret)
6503 		return ret;
6504 
6505 	mutex_lock(&trace_types_lock);
6506 	cpu = tracing_get_cpu(inode);
6507 	ret = open_pipe_on_cpu(tr, cpu);
6508 	if (ret)
6509 		goto fail_pipe_on_cpu;
6510 
6511 	/* create a buffer to store the information to pass to userspace */
6512 	iter = kzalloc(sizeof(*iter), GFP_KERNEL);
6513 	if (!iter) {
6514 		ret = -ENOMEM;
6515 		goto fail_alloc_iter;
6516 	}
6517 
6518 	trace_seq_init(&iter->seq);
6519 	iter->trace = tr->current_trace;
6520 
6521 	if (!alloc_cpumask_var(&iter->started, GFP_KERNEL)) {
6522 		ret = -ENOMEM;
6523 		goto fail;
6524 	}
6525 
6526 	/* trace pipe does not show start of buffer */
6527 	cpumask_setall(iter->started);
6528 
6529 	if (tr->trace_flags & TRACE_ITER_LATENCY_FMT)
6530 		iter->iter_flags |= TRACE_FILE_LAT_FMT;
6531 
6532 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
6533 	if (trace_clocks[tr->clock_id].in_ns)
6534 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
6535 
6536 	iter->tr = tr;
6537 	iter->array_buffer = &tr->array_buffer;
6538 	iter->cpu_file = cpu;
6539 	mutex_init(&iter->mutex);
6540 	filp->private_data = iter;
6541 
6542 	if (iter->trace->pipe_open)
6543 		iter->trace->pipe_open(iter);
6544 
6545 	nonseekable_open(inode, filp);
6546 
6547 	tr->trace_ref++;
6548 
6549 	mutex_unlock(&trace_types_lock);
6550 	return ret;
6551 
6552 fail:
6553 	kfree(iter);
6554 fail_alloc_iter:
6555 	close_pipe_on_cpu(tr, cpu);
6556 fail_pipe_on_cpu:
6557 	__trace_array_put(tr);
6558 	mutex_unlock(&trace_types_lock);
6559 	return ret;
6560 }
6561 
6562 static int tracing_release_pipe(struct inode *inode, struct file *file)
6563 {
6564 	struct trace_iterator *iter = file->private_data;
6565 	struct trace_array *tr = inode->i_private;
6566 
6567 	mutex_lock(&trace_types_lock);
6568 
6569 	tr->trace_ref--;
6570 
6571 	if (iter->trace->pipe_close)
6572 		iter->trace->pipe_close(iter);
6573 	close_pipe_on_cpu(tr, iter->cpu_file);
6574 	mutex_unlock(&trace_types_lock);
6575 
6576 	free_trace_iter_content(iter);
6577 	kfree(iter);
6578 
6579 	trace_array_put(tr);
6580 
6581 	return 0;
6582 }
6583 
6584 static __poll_t
6585 trace_poll(struct trace_iterator *iter, struct file *filp, poll_table *poll_table)
6586 {
6587 	struct trace_array *tr = iter->tr;
6588 
6589 	/* Iterators are static, they should be filled or empty */
6590 	if (trace_buffer_iter(iter, iter->cpu_file))
6591 		return EPOLLIN | EPOLLRDNORM;
6592 
6593 	if (tr->trace_flags & TRACE_ITER_BLOCK)
6594 		/*
6595 		 * Always select as readable when in blocking mode
6596 		 */
6597 		return EPOLLIN | EPOLLRDNORM;
6598 	else
6599 		return ring_buffer_poll_wait(iter->array_buffer->buffer, iter->cpu_file,
6600 					     filp, poll_table, iter->tr->buffer_percent);
6601 }
6602 
6603 static __poll_t
6604 tracing_poll_pipe(struct file *filp, poll_table *poll_table)
6605 {
6606 	struct trace_iterator *iter = filp->private_data;
6607 
6608 	return trace_poll(iter, filp, poll_table);
6609 }
6610 
6611 /* Must be called with iter->mutex held. */
6612 static int tracing_wait_pipe(struct file *filp)
6613 {
6614 	struct trace_iterator *iter = filp->private_data;
6615 	int ret;
6616 
6617 	while (trace_empty(iter)) {
6618 
6619 		if ((filp->f_flags & O_NONBLOCK)) {
6620 			return -EAGAIN;
6621 		}
6622 
6623 		/*
6624 		 * We block until we read something and tracing is disabled.
6625 		 * We still block if tracing is disabled, but we have never
6626 		 * read anything. This allows a user to cat this file, and
6627 		 * then enable tracing. But after we have read something,
6628 		 * we give an EOF when tracing is again disabled.
6629 		 *
6630 		 * iter->pos will be 0 if we haven't read anything.
6631 		 */
6632 		if (!tracer_tracing_is_on(iter->tr) && iter->pos)
6633 			break;
6634 
6635 		mutex_unlock(&iter->mutex);
6636 
6637 		ret = wait_on_pipe(iter, 0);
6638 
6639 		mutex_lock(&iter->mutex);
6640 
6641 		if (ret)
6642 			return ret;
6643 	}
6644 
6645 	return 1;
6646 }
6647 
6648 /*
6649  * Consumer reader.
6650  */
6651 static ssize_t
6652 tracing_read_pipe(struct file *filp, char __user *ubuf,
6653 		  size_t cnt, loff_t *ppos)
6654 {
6655 	struct trace_iterator *iter = filp->private_data;
6656 	ssize_t sret;
6657 
6658 	/*
6659 	 * Avoid more than one consumer on a single file descriptor
6660 	 * This is just a matter of traces coherency, the ring buffer itself
6661 	 * is protected.
6662 	 */
6663 	mutex_lock(&iter->mutex);
6664 
6665 	/* return any leftover data */
6666 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6667 	if (sret != -EBUSY)
6668 		goto out;
6669 
6670 	trace_seq_init(&iter->seq);
6671 
6672 	if (iter->trace->read) {
6673 		sret = iter->trace->read(iter, filp, ubuf, cnt, ppos);
6674 		if (sret)
6675 			goto out;
6676 	}
6677 
6678 waitagain:
6679 	sret = tracing_wait_pipe(filp);
6680 	if (sret <= 0)
6681 		goto out;
6682 
6683 	/* stop when tracing is finished */
6684 	if (trace_empty(iter)) {
6685 		sret = 0;
6686 		goto out;
6687 	}
6688 
6689 	if (cnt >= TRACE_SEQ_BUFFER_SIZE)
6690 		cnt = TRACE_SEQ_BUFFER_SIZE - 1;
6691 
6692 	/* reset all but tr, trace, and overruns */
6693 	trace_iterator_reset(iter);
6694 	cpumask_clear(iter->started);
6695 	trace_seq_init(&iter->seq);
6696 
6697 	trace_event_read_lock();
6698 	trace_access_lock(iter->cpu_file);
6699 	while (trace_find_next_entry_inc(iter) != NULL) {
6700 		enum print_line_t ret;
6701 		int save_len = iter->seq.seq.len;
6702 
6703 		ret = print_trace_line(iter);
6704 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6705 			/*
6706 			 * If one print_trace_line() fills entire trace_seq in one shot,
6707 			 * trace_seq_to_user() will returns -EBUSY because save_len == 0,
6708 			 * In this case, we need to consume it, otherwise, loop will peek
6709 			 * this event next time, resulting in an infinite loop.
6710 			 */
6711 			if (save_len == 0) {
6712 				iter->seq.full = 0;
6713 				trace_seq_puts(&iter->seq, "[LINE TOO BIG]\n");
6714 				trace_consume(iter);
6715 				break;
6716 			}
6717 
6718 			/* In other cases, don't print partial lines */
6719 			iter->seq.seq.len = save_len;
6720 			break;
6721 		}
6722 		if (ret != TRACE_TYPE_NO_CONSUME)
6723 			trace_consume(iter);
6724 
6725 		if (trace_seq_used(&iter->seq) >= cnt)
6726 			break;
6727 
6728 		/*
6729 		 * Setting the full flag means we reached the trace_seq buffer
6730 		 * size and we should leave by partial output condition above.
6731 		 * One of the trace_seq_* functions is not used properly.
6732 		 */
6733 		WARN_ONCE(iter->seq.full, "full flag set for trace type %d",
6734 			  iter->ent->type);
6735 	}
6736 	trace_access_unlock(iter->cpu_file);
6737 	trace_event_read_unlock();
6738 
6739 	/* Now copy what we have to the user */
6740 	sret = trace_seq_to_user(&iter->seq, ubuf, cnt);
6741 	if (iter->seq.readpos >= trace_seq_used(&iter->seq))
6742 		trace_seq_init(&iter->seq);
6743 
6744 	/*
6745 	 * If there was nothing to send to user, in spite of consuming trace
6746 	 * entries, go back to wait for more entries.
6747 	 */
6748 	if (sret == -EBUSY)
6749 		goto waitagain;
6750 
6751 out:
6752 	mutex_unlock(&iter->mutex);
6753 
6754 	return sret;
6755 }
6756 
6757 static void tracing_spd_release_pipe(struct splice_pipe_desc *spd,
6758 				     unsigned int idx)
6759 {
6760 	__free_page(spd->pages[idx]);
6761 }
6762 
6763 static size_t
6764 tracing_fill_pipe_page(size_t rem, struct trace_iterator *iter)
6765 {
6766 	size_t count;
6767 	int save_len;
6768 	int ret;
6769 
6770 	/* Seq buffer is page-sized, exactly what we need. */
6771 	for (;;) {
6772 		save_len = iter->seq.seq.len;
6773 		ret = print_trace_line(iter);
6774 
6775 		if (trace_seq_has_overflowed(&iter->seq)) {
6776 			iter->seq.seq.len = save_len;
6777 			break;
6778 		}
6779 
6780 		/*
6781 		 * This should not be hit, because it should only
6782 		 * be set if the iter->seq overflowed. But check it
6783 		 * anyway to be safe.
6784 		 */
6785 		if (ret == TRACE_TYPE_PARTIAL_LINE) {
6786 			iter->seq.seq.len = save_len;
6787 			break;
6788 		}
6789 
6790 		count = trace_seq_used(&iter->seq) - save_len;
6791 		if (rem < count) {
6792 			rem = 0;
6793 			iter->seq.seq.len = save_len;
6794 			break;
6795 		}
6796 
6797 		if (ret != TRACE_TYPE_NO_CONSUME)
6798 			trace_consume(iter);
6799 		rem -= count;
6800 		if (!trace_find_next_entry_inc(iter))	{
6801 			rem = 0;
6802 			iter->ent = NULL;
6803 			break;
6804 		}
6805 	}
6806 
6807 	return rem;
6808 }
6809 
6810 static ssize_t tracing_splice_read_pipe(struct file *filp,
6811 					loff_t *ppos,
6812 					struct pipe_inode_info *pipe,
6813 					size_t len,
6814 					unsigned int flags)
6815 {
6816 	struct page *pages_def[PIPE_DEF_BUFFERS];
6817 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
6818 	struct trace_iterator *iter = filp->private_data;
6819 	struct splice_pipe_desc spd = {
6820 		.pages		= pages_def,
6821 		.partial	= partial_def,
6822 		.nr_pages	= 0, /* This gets updated below. */
6823 		.nr_pages_max	= PIPE_DEF_BUFFERS,
6824 		.ops		= &default_pipe_buf_ops,
6825 		.spd_release	= tracing_spd_release_pipe,
6826 	};
6827 	ssize_t ret;
6828 	size_t rem;
6829 	unsigned int i;
6830 
6831 	if (splice_grow_spd(pipe, &spd))
6832 		return -ENOMEM;
6833 
6834 	mutex_lock(&iter->mutex);
6835 
6836 	if (iter->trace->splice_read) {
6837 		ret = iter->trace->splice_read(iter, filp,
6838 					       ppos, pipe, len, flags);
6839 		if (ret)
6840 			goto out_err;
6841 	}
6842 
6843 	ret = tracing_wait_pipe(filp);
6844 	if (ret <= 0)
6845 		goto out_err;
6846 
6847 	if (!iter->ent && !trace_find_next_entry_inc(iter)) {
6848 		ret = -EFAULT;
6849 		goto out_err;
6850 	}
6851 
6852 	trace_event_read_lock();
6853 	trace_access_lock(iter->cpu_file);
6854 
6855 	/* Fill as many pages as possible. */
6856 	for (i = 0, rem = len; i < spd.nr_pages_max && rem; i++) {
6857 		spd.pages[i] = alloc_page(GFP_KERNEL);
6858 		if (!spd.pages[i])
6859 			break;
6860 
6861 		rem = tracing_fill_pipe_page(rem, iter);
6862 
6863 		/* Copy the data into the page, so we can start over. */
6864 		ret = trace_seq_to_buffer(&iter->seq,
6865 					  page_address(spd.pages[i]),
6866 					  trace_seq_used(&iter->seq));
6867 		if (ret < 0) {
6868 			__free_page(spd.pages[i]);
6869 			break;
6870 		}
6871 		spd.partial[i].offset = 0;
6872 		spd.partial[i].len = trace_seq_used(&iter->seq);
6873 
6874 		trace_seq_init(&iter->seq);
6875 	}
6876 
6877 	trace_access_unlock(iter->cpu_file);
6878 	trace_event_read_unlock();
6879 	mutex_unlock(&iter->mutex);
6880 
6881 	spd.nr_pages = i;
6882 
6883 	if (i)
6884 		ret = splice_to_pipe(pipe, &spd);
6885 	else
6886 		ret = 0;
6887 out:
6888 	splice_shrink_spd(&spd);
6889 	return ret;
6890 
6891 out_err:
6892 	mutex_unlock(&iter->mutex);
6893 	goto out;
6894 }
6895 
6896 static ssize_t
6897 tracing_entries_read(struct file *filp, char __user *ubuf,
6898 		     size_t cnt, loff_t *ppos)
6899 {
6900 	struct inode *inode = file_inode(filp);
6901 	struct trace_array *tr = inode->i_private;
6902 	int cpu = tracing_get_cpu(inode);
6903 	char buf[64];
6904 	int r = 0;
6905 	ssize_t ret;
6906 
6907 	mutex_lock(&trace_types_lock);
6908 
6909 	if (cpu == RING_BUFFER_ALL_CPUS) {
6910 		int cpu, buf_size_same;
6911 		unsigned long size;
6912 
6913 		size = 0;
6914 		buf_size_same = 1;
6915 		/* check if all cpu sizes are same */
6916 		for_each_tracing_cpu(cpu) {
6917 			/* fill in the size from first enabled cpu */
6918 			if (size == 0)
6919 				size = per_cpu_ptr(tr->array_buffer.data, cpu)->entries;
6920 			if (size != per_cpu_ptr(tr->array_buffer.data, cpu)->entries) {
6921 				buf_size_same = 0;
6922 				break;
6923 			}
6924 		}
6925 
6926 		if (buf_size_same) {
6927 			if (!tr->ring_buffer_expanded)
6928 				r = sprintf(buf, "%lu (expanded: %lu)\n",
6929 					    size >> 10,
6930 					    trace_buf_size >> 10);
6931 			else
6932 				r = sprintf(buf, "%lu\n", size >> 10);
6933 		} else
6934 			r = sprintf(buf, "X\n");
6935 	} else
6936 		r = sprintf(buf, "%lu\n", per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10);
6937 
6938 	mutex_unlock(&trace_types_lock);
6939 
6940 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6941 	return ret;
6942 }
6943 
6944 static ssize_t
6945 tracing_entries_write(struct file *filp, const char __user *ubuf,
6946 		      size_t cnt, loff_t *ppos)
6947 {
6948 	struct inode *inode = file_inode(filp);
6949 	struct trace_array *tr = inode->i_private;
6950 	unsigned long val;
6951 	int ret;
6952 
6953 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
6954 	if (ret)
6955 		return ret;
6956 
6957 	/* must have at least 1 entry */
6958 	if (!val)
6959 		return -EINVAL;
6960 
6961 	/* value is in KB */
6962 	val <<= 10;
6963 	ret = tracing_resize_ring_buffer(tr, val, tracing_get_cpu(inode));
6964 	if (ret < 0)
6965 		return ret;
6966 
6967 	*ppos += cnt;
6968 
6969 	return cnt;
6970 }
6971 
6972 static ssize_t
6973 tracing_total_entries_read(struct file *filp, char __user *ubuf,
6974 				size_t cnt, loff_t *ppos)
6975 {
6976 	struct trace_array *tr = filp->private_data;
6977 	char buf[64];
6978 	int r, cpu;
6979 	unsigned long size = 0, expanded_size = 0;
6980 
6981 	mutex_lock(&trace_types_lock);
6982 	for_each_tracing_cpu(cpu) {
6983 		size += per_cpu_ptr(tr->array_buffer.data, cpu)->entries >> 10;
6984 		if (!tr->ring_buffer_expanded)
6985 			expanded_size += trace_buf_size >> 10;
6986 	}
6987 	if (tr->ring_buffer_expanded)
6988 		r = sprintf(buf, "%lu\n", size);
6989 	else
6990 		r = sprintf(buf, "%lu (expanded: %lu)\n", size, expanded_size);
6991 	mutex_unlock(&trace_types_lock);
6992 
6993 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
6994 }
6995 
6996 static ssize_t
6997 tracing_last_boot_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
6998 {
6999 	struct trace_array *tr = filp->private_data;
7000 	struct seq_buf seq;
7001 	char buf[64];
7002 
7003 	seq_buf_init(&seq, buf, 64);
7004 
7005 	seq_buf_printf(&seq, "text delta:\t%ld\n", tr->text_delta);
7006 	seq_buf_printf(&seq, "data delta:\t%ld\n", tr->data_delta);
7007 
7008 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, seq_buf_used(&seq));
7009 }
7010 
7011 static int tracing_buffer_meta_open(struct inode *inode, struct file *filp)
7012 {
7013 	struct trace_array *tr = inode->i_private;
7014 	int cpu = tracing_get_cpu(inode);
7015 	int ret;
7016 
7017 	ret = tracing_check_open_get_tr(tr);
7018 	if (ret)
7019 		return ret;
7020 
7021 	ret = ring_buffer_meta_seq_init(filp, tr->array_buffer.buffer, cpu);
7022 	if (ret < 0)
7023 		__trace_array_put(tr);
7024 	return ret;
7025 }
7026 
7027 static ssize_t
7028 tracing_free_buffer_write(struct file *filp, const char __user *ubuf,
7029 			  size_t cnt, loff_t *ppos)
7030 {
7031 	/*
7032 	 * There is no need to read what the user has written, this function
7033 	 * is just to make sure that there is no error when "echo" is used
7034 	 */
7035 
7036 	*ppos += cnt;
7037 
7038 	return cnt;
7039 }
7040 
7041 static int
7042 tracing_free_buffer_release(struct inode *inode, struct file *filp)
7043 {
7044 	struct trace_array *tr = inode->i_private;
7045 
7046 	/* disable tracing ? */
7047 	if (tr->trace_flags & TRACE_ITER_STOP_ON_FREE)
7048 		tracer_tracing_off(tr);
7049 	/* resize the ring buffer to 0 */
7050 	tracing_resize_ring_buffer(tr, 0, RING_BUFFER_ALL_CPUS);
7051 
7052 	trace_array_put(tr);
7053 
7054 	return 0;
7055 }
7056 
7057 #define TRACE_MARKER_MAX_SIZE		4096
7058 
7059 static ssize_t
7060 tracing_mark_write(struct file *filp, const char __user *ubuf,
7061 					size_t cnt, loff_t *fpos)
7062 {
7063 	struct trace_array *tr = filp->private_data;
7064 	struct ring_buffer_event *event;
7065 	enum event_trigger_type tt = ETT_NONE;
7066 	struct trace_buffer *buffer;
7067 	struct print_entry *entry;
7068 	int meta_size;
7069 	ssize_t written;
7070 	size_t size;
7071 	int len;
7072 
7073 /* Used in tracing_mark_raw_write() as well */
7074 #define FAULTED_STR "<faulted>"
7075 #define FAULTED_SIZE (sizeof(FAULTED_STR) - 1) /* '\0' is already accounted for */
7076 
7077 	if (tracing_disabled)
7078 		return -EINVAL;
7079 
7080 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7081 		return -EINVAL;
7082 
7083 	if ((ssize_t)cnt < 0)
7084 		return -EINVAL;
7085 
7086 	if (cnt > TRACE_MARKER_MAX_SIZE)
7087 		cnt = TRACE_MARKER_MAX_SIZE;
7088 
7089 	meta_size = sizeof(*entry) + 2;  /* add '\0' and possible '\n' */
7090  again:
7091 	size = cnt + meta_size;
7092 
7093 	/* If less than "<faulted>", then make sure we can still add that */
7094 	if (cnt < FAULTED_SIZE)
7095 		size += FAULTED_SIZE - cnt;
7096 
7097 	buffer = tr->array_buffer.buffer;
7098 	event = __trace_buffer_lock_reserve(buffer, TRACE_PRINT, size,
7099 					    tracing_gen_ctx());
7100 	if (unlikely(!event)) {
7101 		/*
7102 		 * If the size was greater than what was allowed, then
7103 		 * make it smaller and try again.
7104 		 */
7105 		if (size > ring_buffer_max_event_size(buffer)) {
7106 			/* cnt < FAULTED size should never be bigger than max */
7107 			if (WARN_ON_ONCE(cnt < FAULTED_SIZE))
7108 				return -EBADF;
7109 			cnt = ring_buffer_max_event_size(buffer) - meta_size;
7110 			/* The above should only happen once */
7111 			if (WARN_ON_ONCE(cnt + meta_size == size))
7112 				return -EBADF;
7113 			goto again;
7114 		}
7115 
7116 		/* Ring buffer disabled, return as if not open for write */
7117 		return -EBADF;
7118 	}
7119 
7120 	entry = ring_buffer_event_data(event);
7121 	entry->ip = _THIS_IP_;
7122 
7123 	len = __copy_from_user_inatomic(&entry->buf, ubuf, cnt);
7124 	if (len) {
7125 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7126 		cnt = FAULTED_SIZE;
7127 		written = -EFAULT;
7128 	} else
7129 		written = cnt;
7130 
7131 	if (tr->trace_marker_file && !list_empty(&tr->trace_marker_file->triggers)) {
7132 		/* do not add \n before testing triggers, but add \0 */
7133 		entry->buf[cnt] = '\0';
7134 		tt = event_triggers_call(tr->trace_marker_file, buffer, entry, event);
7135 	}
7136 
7137 	if (entry->buf[cnt - 1] != '\n') {
7138 		entry->buf[cnt] = '\n';
7139 		entry->buf[cnt + 1] = '\0';
7140 	} else
7141 		entry->buf[cnt] = '\0';
7142 
7143 	if (static_branch_unlikely(&trace_marker_exports_enabled))
7144 		ftrace_exports(event, TRACE_EXPORT_MARKER);
7145 	__buffer_unlock_commit(buffer, event);
7146 
7147 	if (tt)
7148 		event_triggers_post_call(tr->trace_marker_file, tt);
7149 
7150 	return written;
7151 }
7152 
7153 static ssize_t
7154 tracing_mark_raw_write(struct file *filp, const char __user *ubuf,
7155 					size_t cnt, loff_t *fpos)
7156 {
7157 	struct trace_array *tr = filp->private_data;
7158 	struct ring_buffer_event *event;
7159 	struct trace_buffer *buffer;
7160 	struct raw_data_entry *entry;
7161 	ssize_t written;
7162 	int size;
7163 	int len;
7164 
7165 #define FAULT_SIZE_ID (FAULTED_SIZE + sizeof(int))
7166 
7167 	if (tracing_disabled)
7168 		return -EINVAL;
7169 
7170 	if (!(tr->trace_flags & TRACE_ITER_MARKERS))
7171 		return -EINVAL;
7172 
7173 	/* The marker must at least have a tag id */
7174 	if (cnt < sizeof(unsigned int))
7175 		return -EINVAL;
7176 
7177 	size = sizeof(*entry) + cnt;
7178 	if (cnt < FAULT_SIZE_ID)
7179 		size += FAULT_SIZE_ID - cnt;
7180 
7181 	buffer = tr->array_buffer.buffer;
7182 
7183 	if (size > ring_buffer_max_event_size(buffer))
7184 		return -EINVAL;
7185 
7186 	event = __trace_buffer_lock_reserve(buffer, TRACE_RAW_DATA, size,
7187 					    tracing_gen_ctx());
7188 	if (!event)
7189 		/* Ring buffer disabled, return as if not open for write */
7190 		return -EBADF;
7191 
7192 	entry = ring_buffer_event_data(event);
7193 
7194 	len = __copy_from_user_inatomic(&entry->id, ubuf, cnt);
7195 	if (len) {
7196 		entry->id = -1;
7197 		memcpy(&entry->buf, FAULTED_STR, FAULTED_SIZE);
7198 		written = -EFAULT;
7199 	} else
7200 		written = cnt;
7201 
7202 	__buffer_unlock_commit(buffer, event);
7203 
7204 	return written;
7205 }
7206 
7207 static int tracing_clock_show(struct seq_file *m, void *v)
7208 {
7209 	struct trace_array *tr = m->private;
7210 	int i;
7211 
7212 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++)
7213 		seq_printf(m,
7214 			"%s%s%s%s", i ? " " : "",
7215 			i == tr->clock_id ? "[" : "", trace_clocks[i].name,
7216 			i == tr->clock_id ? "]" : "");
7217 	seq_putc(m, '\n');
7218 
7219 	return 0;
7220 }
7221 
7222 int tracing_set_clock(struct trace_array *tr, const char *clockstr)
7223 {
7224 	int i;
7225 
7226 	for (i = 0; i < ARRAY_SIZE(trace_clocks); i++) {
7227 		if (strcmp(trace_clocks[i].name, clockstr) == 0)
7228 			break;
7229 	}
7230 	if (i == ARRAY_SIZE(trace_clocks))
7231 		return -EINVAL;
7232 
7233 	mutex_lock(&trace_types_lock);
7234 
7235 	tr->clock_id = i;
7236 
7237 	ring_buffer_set_clock(tr->array_buffer.buffer, trace_clocks[i].func);
7238 
7239 	/*
7240 	 * New clock may not be consistent with the previous clock.
7241 	 * Reset the buffer so that it doesn't have incomparable timestamps.
7242 	 */
7243 	tracing_reset_online_cpus(&tr->array_buffer);
7244 
7245 #ifdef CONFIG_TRACER_MAX_TRACE
7246 	if (tr->max_buffer.buffer)
7247 		ring_buffer_set_clock(tr->max_buffer.buffer, trace_clocks[i].func);
7248 	tracing_reset_online_cpus(&tr->max_buffer);
7249 #endif
7250 
7251 	mutex_unlock(&trace_types_lock);
7252 
7253 	return 0;
7254 }
7255 
7256 static ssize_t tracing_clock_write(struct file *filp, const char __user *ubuf,
7257 				   size_t cnt, loff_t *fpos)
7258 {
7259 	struct seq_file *m = filp->private_data;
7260 	struct trace_array *tr = m->private;
7261 	char buf[64];
7262 	const char *clockstr;
7263 	int ret;
7264 
7265 	if (cnt >= sizeof(buf))
7266 		return -EINVAL;
7267 
7268 	if (copy_from_user(buf, ubuf, cnt))
7269 		return -EFAULT;
7270 
7271 	buf[cnt] = 0;
7272 
7273 	clockstr = strstrip(buf);
7274 
7275 	ret = tracing_set_clock(tr, clockstr);
7276 	if (ret)
7277 		return ret;
7278 
7279 	*fpos += cnt;
7280 
7281 	return cnt;
7282 }
7283 
7284 static int tracing_clock_open(struct inode *inode, struct file *file)
7285 {
7286 	struct trace_array *tr = inode->i_private;
7287 	int ret;
7288 
7289 	ret = tracing_check_open_get_tr(tr);
7290 	if (ret)
7291 		return ret;
7292 
7293 	ret = single_open(file, tracing_clock_show, inode->i_private);
7294 	if (ret < 0)
7295 		trace_array_put(tr);
7296 
7297 	return ret;
7298 }
7299 
7300 static int tracing_time_stamp_mode_show(struct seq_file *m, void *v)
7301 {
7302 	struct trace_array *tr = m->private;
7303 
7304 	mutex_lock(&trace_types_lock);
7305 
7306 	if (ring_buffer_time_stamp_abs(tr->array_buffer.buffer))
7307 		seq_puts(m, "delta [absolute]\n");
7308 	else
7309 		seq_puts(m, "[delta] absolute\n");
7310 
7311 	mutex_unlock(&trace_types_lock);
7312 
7313 	return 0;
7314 }
7315 
7316 static int tracing_time_stamp_mode_open(struct inode *inode, struct file *file)
7317 {
7318 	struct trace_array *tr = inode->i_private;
7319 	int ret;
7320 
7321 	ret = tracing_check_open_get_tr(tr);
7322 	if (ret)
7323 		return ret;
7324 
7325 	ret = single_open(file, tracing_time_stamp_mode_show, inode->i_private);
7326 	if (ret < 0)
7327 		trace_array_put(tr);
7328 
7329 	return ret;
7330 }
7331 
7332 u64 tracing_event_time_stamp(struct trace_buffer *buffer, struct ring_buffer_event *rbe)
7333 {
7334 	if (rbe == this_cpu_read(trace_buffered_event))
7335 		return ring_buffer_time_stamp(buffer);
7336 
7337 	return ring_buffer_event_time_stamp(buffer, rbe);
7338 }
7339 
7340 /*
7341  * Set or disable using the per CPU trace_buffer_event when possible.
7342  */
7343 int tracing_set_filter_buffering(struct trace_array *tr, bool set)
7344 {
7345 	int ret = 0;
7346 
7347 	mutex_lock(&trace_types_lock);
7348 
7349 	if (set && tr->no_filter_buffering_ref++)
7350 		goto out;
7351 
7352 	if (!set) {
7353 		if (WARN_ON_ONCE(!tr->no_filter_buffering_ref)) {
7354 			ret = -EINVAL;
7355 			goto out;
7356 		}
7357 
7358 		--tr->no_filter_buffering_ref;
7359 	}
7360  out:
7361 	mutex_unlock(&trace_types_lock);
7362 
7363 	return ret;
7364 }
7365 
7366 struct ftrace_buffer_info {
7367 	struct trace_iterator	iter;
7368 	void			*spare;
7369 	unsigned int		spare_cpu;
7370 	unsigned int		spare_size;
7371 	unsigned int		read;
7372 };
7373 
7374 #ifdef CONFIG_TRACER_SNAPSHOT
7375 static int tracing_snapshot_open(struct inode *inode, struct file *file)
7376 {
7377 	struct trace_array *tr = inode->i_private;
7378 	struct trace_iterator *iter;
7379 	struct seq_file *m;
7380 	int ret;
7381 
7382 	ret = tracing_check_open_get_tr(tr);
7383 	if (ret)
7384 		return ret;
7385 
7386 	if (file->f_mode & FMODE_READ) {
7387 		iter = __tracing_open(inode, file, true);
7388 		if (IS_ERR(iter))
7389 			ret = PTR_ERR(iter);
7390 	} else {
7391 		/* Writes still need the seq_file to hold the private data */
7392 		ret = -ENOMEM;
7393 		m = kzalloc(sizeof(*m), GFP_KERNEL);
7394 		if (!m)
7395 			goto out;
7396 		iter = kzalloc(sizeof(*iter), GFP_KERNEL);
7397 		if (!iter) {
7398 			kfree(m);
7399 			goto out;
7400 		}
7401 		ret = 0;
7402 
7403 		iter->tr = tr;
7404 		iter->array_buffer = &tr->max_buffer;
7405 		iter->cpu_file = tracing_get_cpu(inode);
7406 		m->private = iter;
7407 		file->private_data = m;
7408 	}
7409 out:
7410 	if (ret < 0)
7411 		trace_array_put(tr);
7412 
7413 	return ret;
7414 }
7415 
7416 static void tracing_swap_cpu_buffer(void *tr)
7417 {
7418 	update_max_tr_single((struct trace_array *)tr, current, smp_processor_id());
7419 }
7420 
7421 static ssize_t
7422 tracing_snapshot_write(struct file *filp, const char __user *ubuf, size_t cnt,
7423 		       loff_t *ppos)
7424 {
7425 	struct seq_file *m = filp->private_data;
7426 	struct trace_iterator *iter = m->private;
7427 	struct trace_array *tr = iter->tr;
7428 	unsigned long val;
7429 	int ret;
7430 
7431 	ret = tracing_update_buffers(tr);
7432 	if (ret < 0)
7433 		return ret;
7434 
7435 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
7436 	if (ret)
7437 		return ret;
7438 
7439 	mutex_lock(&trace_types_lock);
7440 
7441 	if (tr->current_trace->use_max_tr) {
7442 		ret = -EBUSY;
7443 		goto out;
7444 	}
7445 
7446 	local_irq_disable();
7447 	arch_spin_lock(&tr->max_lock);
7448 	if (tr->cond_snapshot)
7449 		ret = -EBUSY;
7450 	arch_spin_unlock(&tr->max_lock);
7451 	local_irq_enable();
7452 	if (ret)
7453 		goto out;
7454 
7455 	switch (val) {
7456 	case 0:
7457 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7458 			ret = -EINVAL;
7459 			break;
7460 		}
7461 		if (tr->allocated_snapshot)
7462 			free_snapshot(tr);
7463 		break;
7464 	case 1:
7465 /* Only allow per-cpu swap if the ring buffer supports it */
7466 #ifndef CONFIG_RING_BUFFER_ALLOW_SWAP
7467 		if (iter->cpu_file != RING_BUFFER_ALL_CPUS) {
7468 			ret = -EINVAL;
7469 			break;
7470 		}
7471 #endif
7472 		if (tr->allocated_snapshot)
7473 			ret = resize_buffer_duplicate_size(&tr->max_buffer,
7474 					&tr->array_buffer, iter->cpu_file);
7475 
7476 		ret = tracing_arm_snapshot_locked(tr);
7477 		if (ret)
7478 			break;
7479 
7480 		/* Now, we're going to swap */
7481 		if (iter->cpu_file == RING_BUFFER_ALL_CPUS) {
7482 			local_irq_disable();
7483 			update_max_tr(tr, current, smp_processor_id(), NULL);
7484 			local_irq_enable();
7485 		} else {
7486 			smp_call_function_single(iter->cpu_file, tracing_swap_cpu_buffer,
7487 						 (void *)tr, 1);
7488 		}
7489 		tracing_disarm_snapshot(tr);
7490 		break;
7491 	default:
7492 		if (tr->allocated_snapshot) {
7493 			if (iter->cpu_file == RING_BUFFER_ALL_CPUS)
7494 				tracing_reset_online_cpus(&tr->max_buffer);
7495 			else
7496 				tracing_reset_cpu(&tr->max_buffer, iter->cpu_file);
7497 		}
7498 		break;
7499 	}
7500 
7501 	if (ret >= 0) {
7502 		*ppos += cnt;
7503 		ret = cnt;
7504 	}
7505 out:
7506 	mutex_unlock(&trace_types_lock);
7507 	return ret;
7508 }
7509 
7510 static int tracing_snapshot_release(struct inode *inode, struct file *file)
7511 {
7512 	struct seq_file *m = file->private_data;
7513 	int ret;
7514 
7515 	ret = tracing_release(inode, file);
7516 
7517 	if (file->f_mode & FMODE_READ)
7518 		return ret;
7519 
7520 	/* If write only, the seq_file is just a stub */
7521 	if (m)
7522 		kfree(m->private);
7523 	kfree(m);
7524 
7525 	return 0;
7526 }
7527 
7528 static int tracing_buffers_open(struct inode *inode, struct file *filp);
7529 static ssize_t tracing_buffers_read(struct file *filp, char __user *ubuf,
7530 				    size_t count, loff_t *ppos);
7531 static int tracing_buffers_release(struct inode *inode, struct file *file);
7532 static ssize_t tracing_buffers_splice_read(struct file *file, loff_t *ppos,
7533 		   struct pipe_inode_info *pipe, size_t len, unsigned int flags);
7534 
7535 static int snapshot_raw_open(struct inode *inode, struct file *filp)
7536 {
7537 	struct ftrace_buffer_info *info;
7538 	int ret;
7539 
7540 	/* The following checks for tracefs lockdown */
7541 	ret = tracing_buffers_open(inode, filp);
7542 	if (ret < 0)
7543 		return ret;
7544 
7545 	info = filp->private_data;
7546 
7547 	if (info->iter.trace->use_max_tr) {
7548 		tracing_buffers_release(inode, filp);
7549 		return -EBUSY;
7550 	}
7551 
7552 	info->iter.snapshot = true;
7553 	info->iter.array_buffer = &info->iter.tr->max_buffer;
7554 
7555 	return ret;
7556 }
7557 
7558 #endif /* CONFIG_TRACER_SNAPSHOT */
7559 
7560 
7561 static const struct file_operations tracing_thresh_fops = {
7562 	.open		= tracing_open_generic,
7563 	.read		= tracing_thresh_read,
7564 	.write		= tracing_thresh_write,
7565 	.llseek		= generic_file_llseek,
7566 };
7567 
7568 #ifdef CONFIG_TRACER_MAX_TRACE
7569 static const struct file_operations tracing_max_lat_fops = {
7570 	.open		= tracing_open_generic_tr,
7571 	.read		= tracing_max_lat_read,
7572 	.write		= tracing_max_lat_write,
7573 	.llseek		= generic_file_llseek,
7574 	.release	= tracing_release_generic_tr,
7575 };
7576 #endif
7577 
7578 static const struct file_operations set_tracer_fops = {
7579 	.open		= tracing_open_generic_tr,
7580 	.read		= tracing_set_trace_read,
7581 	.write		= tracing_set_trace_write,
7582 	.llseek		= generic_file_llseek,
7583 	.release	= tracing_release_generic_tr,
7584 };
7585 
7586 static const struct file_operations tracing_pipe_fops = {
7587 	.open		= tracing_open_pipe,
7588 	.poll		= tracing_poll_pipe,
7589 	.read		= tracing_read_pipe,
7590 	.splice_read	= tracing_splice_read_pipe,
7591 	.release	= tracing_release_pipe,
7592 };
7593 
7594 static const struct file_operations tracing_entries_fops = {
7595 	.open		= tracing_open_generic_tr,
7596 	.read		= tracing_entries_read,
7597 	.write		= tracing_entries_write,
7598 	.llseek		= generic_file_llseek,
7599 	.release	= tracing_release_generic_tr,
7600 };
7601 
7602 static const struct file_operations tracing_buffer_meta_fops = {
7603 	.open		= tracing_buffer_meta_open,
7604 	.read		= seq_read,
7605 	.llseek		= seq_lseek,
7606 	.release	= tracing_seq_release,
7607 };
7608 
7609 static const struct file_operations tracing_total_entries_fops = {
7610 	.open		= tracing_open_generic_tr,
7611 	.read		= tracing_total_entries_read,
7612 	.llseek		= generic_file_llseek,
7613 	.release	= tracing_release_generic_tr,
7614 };
7615 
7616 static const struct file_operations tracing_free_buffer_fops = {
7617 	.open		= tracing_open_generic_tr,
7618 	.write		= tracing_free_buffer_write,
7619 	.release	= tracing_free_buffer_release,
7620 };
7621 
7622 static const struct file_operations tracing_mark_fops = {
7623 	.open		= tracing_mark_open,
7624 	.write		= tracing_mark_write,
7625 	.release	= tracing_release_generic_tr,
7626 };
7627 
7628 static const struct file_operations tracing_mark_raw_fops = {
7629 	.open		= tracing_mark_open,
7630 	.write		= tracing_mark_raw_write,
7631 	.release	= tracing_release_generic_tr,
7632 };
7633 
7634 static const struct file_operations trace_clock_fops = {
7635 	.open		= tracing_clock_open,
7636 	.read		= seq_read,
7637 	.llseek		= seq_lseek,
7638 	.release	= tracing_single_release_tr,
7639 	.write		= tracing_clock_write,
7640 };
7641 
7642 static const struct file_operations trace_time_stamp_mode_fops = {
7643 	.open		= tracing_time_stamp_mode_open,
7644 	.read		= seq_read,
7645 	.llseek		= seq_lseek,
7646 	.release	= tracing_single_release_tr,
7647 };
7648 
7649 static const struct file_operations last_boot_fops = {
7650 	.open		= tracing_open_generic_tr,
7651 	.read		= tracing_last_boot_read,
7652 	.llseek		= generic_file_llseek,
7653 	.release	= tracing_release_generic_tr,
7654 };
7655 
7656 #ifdef CONFIG_TRACER_SNAPSHOT
7657 static const struct file_operations snapshot_fops = {
7658 	.open		= tracing_snapshot_open,
7659 	.read		= seq_read,
7660 	.write		= tracing_snapshot_write,
7661 	.llseek		= tracing_lseek,
7662 	.release	= tracing_snapshot_release,
7663 };
7664 
7665 static const struct file_operations snapshot_raw_fops = {
7666 	.open		= snapshot_raw_open,
7667 	.read		= tracing_buffers_read,
7668 	.release	= tracing_buffers_release,
7669 	.splice_read	= tracing_buffers_splice_read,
7670 };
7671 
7672 #endif /* CONFIG_TRACER_SNAPSHOT */
7673 
7674 /*
7675  * trace_min_max_write - Write a u64 value to a trace_min_max_param struct
7676  * @filp: The active open file structure
7677  * @ubuf: The userspace provided buffer to read value into
7678  * @cnt: The maximum number of bytes to read
7679  * @ppos: The current "file" position
7680  *
7681  * This function implements the write interface for a struct trace_min_max_param.
7682  * The filp->private_data must point to a trace_min_max_param structure that
7683  * defines where to write the value, the min and the max acceptable values,
7684  * and a lock to protect the write.
7685  */
7686 static ssize_t
7687 trace_min_max_write(struct file *filp, const char __user *ubuf, size_t cnt, loff_t *ppos)
7688 {
7689 	struct trace_min_max_param *param = filp->private_data;
7690 	u64 val;
7691 	int err;
7692 
7693 	if (!param)
7694 		return -EFAULT;
7695 
7696 	err = kstrtoull_from_user(ubuf, cnt, 10, &val);
7697 	if (err)
7698 		return err;
7699 
7700 	if (param->lock)
7701 		mutex_lock(param->lock);
7702 
7703 	if (param->min && val < *param->min)
7704 		err = -EINVAL;
7705 
7706 	if (param->max && val > *param->max)
7707 		err = -EINVAL;
7708 
7709 	if (!err)
7710 		*param->val = val;
7711 
7712 	if (param->lock)
7713 		mutex_unlock(param->lock);
7714 
7715 	if (err)
7716 		return err;
7717 
7718 	return cnt;
7719 }
7720 
7721 /*
7722  * trace_min_max_read - Read a u64 value from a trace_min_max_param struct
7723  * @filp: The active open file structure
7724  * @ubuf: The userspace provided buffer to read value into
7725  * @cnt: The maximum number of bytes to read
7726  * @ppos: The current "file" position
7727  *
7728  * This function implements the read interface for a struct trace_min_max_param.
7729  * The filp->private_data must point to a trace_min_max_param struct with valid
7730  * data.
7731  */
7732 static ssize_t
7733 trace_min_max_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
7734 {
7735 	struct trace_min_max_param *param = filp->private_data;
7736 	char buf[U64_STR_SIZE];
7737 	int len;
7738 	u64 val;
7739 
7740 	if (!param)
7741 		return -EFAULT;
7742 
7743 	val = *param->val;
7744 
7745 	if (cnt > sizeof(buf))
7746 		cnt = sizeof(buf);
7747 
7748 	len = snprintf(buf, sizeof(buf), "%llu\n", val);
7749 
7750 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
7751 }
7752 
7753 const struct file_operations trace_min_max_fops = {
7754 	.open		= tracing_open_generic,
7755 	.read		= trace_min_max_read,
7756 	.write		= trace_min_max_write,
7757 };
7758 
7759 #define TRACING_LOG_ERRS_MAX	8
7760 #define TRACING_LOG_LOC_MAX	128
7761 
7762 #define CMD_PREFIX "  Command: "
7763 
7764 struct err_info {
7765 	const char	**errs;	/* ptr to loc-specific array of err strings */
7766 	u8		type;	/* index into errs -> specific err string */
7767 	u16		pos;	/* caret position */
7768 	u64		ts;
7769 };
7770 
7771 struct tracing_log_err {
7772 	struct list_head	list;
7773 	struct err_info		info;
7774 	char			loc[TRACING_LOG_LOC_MAX]; /* err location */
7775 	char			*cmd;                     /* what caused err */
7776 };
7777 
7778 static DEFINE_MUTEX(tracing_err_log_lock);
7779 
7780 static struct tracing_log_err *alloc_tracing_log_err(int len)
7781 {
7782 	struct tracing_log_err *err;
7783 
7784 	err = kzalloc(sizeof(*err), GFP_KERNEL);
7785 	if (!err)
7786 		return ERR_PTR(-ENOMEM);
7787 
7788 	err->cmd = kzalloc(len, GFP_KERNEL);
7789 	if (!err->cmd) {
7790 		kfree(err);
7791 		return ERR_PTR(-ENOMEM);
7792 	}
7793 
7794 	return err;
7795 }
7796 
7797 static void free_tracing_log_err(struct tracing_log_err *err)
7798 {
7799 	kfree(err->cmd);
7800 	kfree(err);
7801 }
7802 
7803 static struct tracing_log_err *get_tracing_log_err(struct trace_array *tr,
7804 						   int len)
7805 {
7806 	struct tracing_log_err *err;
7807 	char *cmd;
7808 
7809 	if (tr->n_err_log_entries < TRACING_LOG_ERRS_MAX) {
7810 		err = alloc_tracing_log_err(len);
7811 		if (PTR_ERR(err) != -ENOMEM)
7812 			tr->n_err_log_entries++;
7813 
7814 		return err;
7815 	}
7816 	cmd = kzalloc(len, GFP_KERNEL);
7817 	if (!cmd)
7818 		return ERR_PTR(-ENOMEM);
7819 	err = list_first_entry(&tr->err_log, struct tracing_log_err, list);
7820 	kfree(err->cmd);
7821 	err->cmd = cmd;
7822 	list_del(&err->list);
7823 
7824 	return err;
7825 }
7826 
7827 /**
7828  * err_pos - find the position of a string within a command for error careting
7829  * @cmd: The tracing command that caused the error
7830  * @str: The string to position the caret at within @cmd
7831  *
7832  * Finds the position of the first occurrence of @str within @cmd.  The
7833  * return value can be passed to tracing_log_err() for caret placement
7834  * within @cmd.
7835  *
7836  * Returns the index within @cmd of the first occurrence of @str or 0
7837  * if @str was not found.
7838  */
7839 unsigned int err_pos(char *cmd, const char *str)
7840 {
7841 	char *found;
7842 
7843 	if (WARN_ON(!strlen(cmd)))
7844 		return 0;
7845 
7846 	found = strstr(cmd, str);
7847 	if (found)
7848 		return found - cmd;
7849 
7850 	return 0;
7851 }
7852 
7853 /**
7854  * tracing_log_err - write an error to the tracing error log
7855  * @tr: The associated trace array for the error (NULL for top level array)
7856  * @loc: A string describing where the error occurred
7857  * @cmd: The tracing command that caused the error
7858  * @errs: The array of loc-specific static error strings
7859  * @type: The index into errs[], which produces the specific static err string
7860  * @pos: The position the caret should be placed in the cmd
7861  *
7862  * Writes an error into tracing/error_log of the form:
7863  *
7864  * <loc>: error: <text>
7865  *   Command: <cmd>
7866  *              ^
7867  *
7868  * tracing/error_log is a small log file containing the last
7869  * TRACING_LOG_ERRS_MAX errors (8).  Memory for errors isn't allocated
7870  * unless there has been a tracing error, and the error log can be
7871  * cleared and have its memory freed by writing the empty string in
7872  * truncation mode to it i.e. echo > tracing/error_log.
7873  *
7874  * NOTE: the @errs array along with the @type param are used to
7875  * produce a static error string - this string is not copied and saved
7876  * when the error is logged - only a pointer to it is saved.  See
7877  * existing callers for examples of how static strings are typically
7878  * defined for use with tracing_log_err().
7879  */
7880 void tracing_log_err(struct trace_array *tr,
7881 		     const char *loc, const char *cmd,
7882 		     const char **errs, u8 type, u16 pos)
7883 {
7884 	struct tracing_log_err *err;
7885 	int len = 0;
7886 
7887 	if (!tr)
7888 		tr = &global_trace;
7889 
7890 	len += sizeof(CMD_PREFIX) + 2 * sizeof("\n") + strlen(cmd) + 1;
7891 
7892 	mutex_lock(&tracing_err_log_lock);
7893 	err = get_tracing_log_err(tr, len);
7894 	if (PTR_ERR(err) == -ENOMEM) {
7895 		mutex_unlock(&tracing_err_log_lock);
7896 		return;
7897 	}
7898 
7899 	snprintf(err->loc, TRACING_LOG_LOC_MAX, "%s: error: ", loc);
7900 	snprintf(err->cmd, len, "\n" CMD_PREFIX "%s\n", cmd);
7901 
7902 	err->info.errs = errs;
7903 	err->info.type = type;
7904 	err->info.pos = pos;
7905 	err->info.ts = local_clock();
7906 
7907 	list_add_tail(&err->list, &tr->err_log);
7908 	mutex_unlock(&tracing_err_log_lock);
7909 }
7910 
7911 static void clear_tracing_err_log(struct trace_array *tr)
7912 {
7913 	struct tracing_log_err *err, *next;
7914 
7915 	mutex_lock(&tracing_err_log_lock);
7916 	list_for_each_entry_safe(err, next, &tr->err_log, list) {
7917 		list_del(&err->list);
7918 		free_tracing_log_err(err);
7919 	}
7920 
7921 	tr->n_err_log_entries = 0;
7922 	mutex_unlock(&tracing_err_log_lock);
7923 }
7924 
7925 static void *tracing_err_log_seq_start(struct seq_file *m, loff_t *pos)
7926 {
7927 	struct trace_array *tr = m->private;
7928 
7929 	mutex_lock(&tracing_err_log_lock);
7930 
7931 	return seq_list_start(&tr->err_log, *pos);
7932 }
7933 
7934 static void *tracing_err_log_seq_next(struct seq_file *m, void *v, loff_t *pos)
7935 {
7936 	struct trace_array *tr = m->private;
7937 
7938 	return seq_list_next(v, &tr->err_log, pos);
7939 }
7940 
7941 static void tracing_err_log_seq_stop(struct seq_file *m, void *v)
7942 {
7943 	mutex_unlock(&tracing_err_log_lock);
7944 }
7945 
7946 static void tracing_err_log_show_pos(struct seq_file *m, u16 pos)
7947 {
7948 	u16 i;
7949 
7950 	for (i = 0; i < sizeof(CMD_PREFIX) - 1; i++)
7951 		seq_putc(m, ' ');
7952 	for (i = 0; i < pos; i++)
7953 		seq_putc(m, ' ');
7954 	seq_puts(m, "^\n");
7955 }
7956 
7957 static int tracing_err_log_seq_show(struct seq_file *m, void *v)
7958 {
7959 	struct tracing_log_err *err = v;
7960 
7961 	if (err) {
7962 		const char *err_text = err->info.errs[err->info.type];
7963 		u64 sec = err->info.ts;
7964 		u32 nsec;
7965 
7966 		nsec = do_div(sec, NSEC_PER_SEC);
7967 		seq_printf(m, "[%5llu.%06u] %s%s", sec, nsec / 1000,
7968 			   err->loc, err_text);
7969 		seq_printf(m, "%s", err->cmd);
7970 		tracing_err_log_show_pos(m, err->info.pos);
7971 	}
7972 
7973 	return 0;
7974 }
7975 
7976 static const struct seq_operations tracing_err_log_seq_ops = {
7977 	.start  = tracing_err_log_seq_start,
7978 	.next   = tracing_err_log_seq_next,
7979 	.stop   = tracing_err_log_seq_stop,
7980 	.show   = tracing_err_log_seq_show
7981 };
7982 
7983 static int tracing_err_log_open(struct inode *inode, struct file *file)
7984 {
7985 	struct trace_array *tr = inode->i_private;
7986 	int ret = 0;
7987 
7988 	ret = tracing_check_open_get_tr(tr);
7989 	if (ret)
7990 		return ret;
7991 
7992 	/* If this file was opened for write, then erase contents */
7993 	if ((file->f_mode & FMODE_WRITE) && (file->f_flags & O_TRUNC))
7994 		clear_tracing_err_log(tr);
7995 
7996 	if (file->f_mode & FMODE_READ) {
7997 		ret = seq_open(file, &tracing_err_log_seq_ops);
7998 		if (!ret) {
7999 			struct seq_file *m = file->private_data;
8000 			m->private = tr;
8001 		} else {
8002 			trace_array_put(tr);
8003 		}
8004 	}
8005 	return ret;
8006 }
8007 
8008 static ssize_t tracing_err_log_write(struct file *file,
8009 				     const char __user *buffer,
8010 				     size_t count, loff_t *ppos)
8011 {
8012 	return count;
8013 }
8014 
8015 static int tracing_err_log_release(struct inode *inode, struct file *file)
8016 {
8017 	struct trace_array *tr = inode->i_private;
8018 
8019 	trace_array_put(tr);
8020 
8021 	if (file->f_mode & FMODE_READ)
8022 		seq_release(inode, file);
8023 
8024 	return 0;
8025 }
8026 
8027 static const struct file_operations tracing_err_log_fops = {
8028 	.open           = tracing_err_log_open,
8029 	.write		= tracing_err_log_write,
8030 	.read           = seq_read,
8031 	.llseek         = tracing_lseek,
8032 	.release        = tracing_err_log_release,
8033 };
8034 
8035 static int tracing_buffers_open(struct inode *inode, struct file *filp)
8036 {
8037 	struct trace_array *tr = inode->i_private;
8038 	struct ftrace_buffer_info *info;
8039 	int ret;
8040 
8041 	ret = tracing_check_open_get_tr(tr);
8042 	if (ret)
8043 		return ret;
8044 
8045 	info = kvzalloc(sizeof(*info), GFP_KERNEL);
8046 	if (!info) {
8047 		trace_array_put(tr);
8048 		return -ENOMEM;
8049 	}
8050 
8051 	mutex_lock(&trace_types_lock);
8052 
8053 	info->iter.tr		= tr;
8054 	info->iter.cpu_file	= tracing_get_cpu(inode);
8055 	info->iter.trace	= tr->current_trace;
8056 	info->iter.array_buffer = &tr->array_buffer;
8057 	info->spare		= NULL;
8058 	/* Force reading ring buffer for first read */
8059 	info->read		= (unsigned int)-1;
8060 
8061 	filp->private_data = info;
8062 
8063 	tr->trace_ref++;
8064 
8065 	mutex_unlock(&trace_types_lock);
8066 
8067 	ret = nonseekable_open(inode, filp);
8068 	if (ret < 0)
8069 		trace_array_put(tr);
8070 
8071 	return ret;
8072 }
8073 
8074 static __poll_t
8075 tracing_buffers_poll(struct file *filp, poll_table *poll_table)
8076 {
8077 	struct ftrace_buffer_info *info = filp->private_data;
8078 	struct trace_iterator *iter = &info->iter;
8079 
8080 	return trace_poll(iter, filp, poll_table);
8081 }
8082 
8083 static ssize_t
8084 tracing_buffers_read(struct file *filp, char __user *ubuf,
8085 		     size_t count, loff_t *ppos)
8086 {
8087 	struct ftrace_buffer_info *info = filp->private_data;
8088 	struct trace_iterator *iter = &info->iter;
8089 	void *trace_data;
8090 	int page_size;
8091 	ssize_t ret = 0;
8092 	ssize_t size;
8093 
8094 	if (!count)
8095 		return 0;
8096 
8097 #ifdef CONFIG_TRACER_MAX_TRACE
8098 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8099 		return -EBUSY;
8100 #endif
8101 
8102 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8103 
8104 	/* Make sure the spare matches the current sub buffer size */
8105 	if (info->spare) {
8106 		if (page_size != info->spare_size) {
8107 			ring_buffer_free_read_page(iter->array_buffer->buffer,
8108 						   info->spare_cpu, info->spare);
8109 			info->spare = NULL;
8110 		}
8111 	}
8112 
8113 	if (!info->spare) {
8114 		info->spare = ring_buffer_alloc_read_page(iter->array_buffer->buffer,
8115 							  iter->cpu_file);
8116 		if (IS_ERR(info->spare)) {
8117 			ret = PTR_ERR(info->spare);
8118 			info->spare = NULL;
8119 		} else {
8120 			info->spare_cpu = iter->cpu_file;
8121 			info->spare_size = page_size;
8122 		}
8123 	}
8124 	if (!info->spare)
8125 		return ret;
8126 
8127 	/* Do we have previous read data to read? */
8128 	if (info->read < page_size)
8129 		goto read;
8130 
8131  again:
8132 	trace_access_lock(iter->cpu_file);
8133 	ret = ring_buffer_read_page(iter->array_buffer->buffer,
8134 				    info->spare,
8135 				    count,
8136 				    iter->cpu_file, 0);
8137 	trace_access_unlock(iter->cpu_file);
8138 
8139 	if (ret < 0) {
8140 		if (trace_empty(iter) && !iter->closed) {
8141 			if ((filp->f_flags & O_NONBLOCK))
8142 				return -EAGAIN;
8143 
8144 			ret = wait_on_pipe(iter, 0);
8145 			if (ret)
8146 				return ret;
8147 
8148 			goto again;
8149 		}
8150 		return 0;
8151 	}
8152 
8153 	info->read = 0;
8154  read:
8155 	size = page_size - info->read;
8156 	if (size > count)
8157 		size = count;
8158 	trace_data = ring_buffer_read_page_data(info->spare);
8159 	ret = copy_to_user(ubuf, trace_data + info->read, size);
8160 	if (ret == size)
8161 		return -EFAULT;
8162 
8163 	size -= ret;
8164 
8165 	*ppos += size;
8166 	info->read += size;
8167 
8168 	return size;
8169 }
8170 
8171 static int tracing_buffers_flush(struct file *file, fl_owner_t id)
8172 {
8173 	struct ftrace_buffer_info *info = file->private_data;
8174 	struct trace_iterator *iter = &info->iter;
8175 
8176 	iter->closed = true;
8177 	/* Make sure the waiters see the new wait_index */
8178 	(void)atomic_fetch_inc_release(&iter->wait_index);
8179 
8180 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8181 
8182 	return 0;
8183 }
8184 
8185 static int tracing_buffers_release(struct inode *inode, struct file *file)
8186 {
8187 	struct ftrace_buffer_info *info = file->private_data;
8188 	struct trace_iterator *iter = &info->iter;
8189 
8190 	mutex_lock(&trace_types_lock);
8191 
8192 	iter->tr->trace_ref--;
8193 
8194 	__trace_array_put(iter->tr);
8195 
8196 	if (info->spare)
8197 		ring_buffer_free_read_page(iter->array_buffer->buffer,
8198 					   info->spare_cpu, info->spare);
8199 	kvfree(info);
8200 
8201 	mutex_unlock(&trace_types_lock);
8202 
8203 	return 0;
8204 }
8205 
8206 struct buffer_ref {
8207 	struct trace_buffer	*buffer;
8208 	void			*page;
8209 	int			cpu;
8210 	refcount_t		refcount;
8211 };
8212 
8213 static void buffer_ref_release(struct buffer_ref *ref)
8214 {
8215 	if (!refcount_dec_and_test(&ref->refcount))
8216 		return;
8217 	ring_buffer_free_read_page(ref->buffer, ref->cpu, ref->page);
8218 	kfree(ref);
8219 }
8220 
8221 static void buffer_pipe_buf_release(struct pipe_inode_info *pipe,
8222 				    struct pipe_buffer *buf)
8223 {
8224 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8225 
8226 	buffer_ref_release(ref);
8227 	buf->private = 0;
8228 }
8229 
8230 static bool buffer_pipe_buf_get(struct pipe_inode_info *pipe,
8231 				struct pipe_buffer *buf)
8232 {
8233 	struct buffer_ref *ref = (struct buffer_ref *)buf->private;
8234 
8235 	if (refcount_read(&ref->refcount) > INT_MAX/2)
8236 		return false;
8237 
8238 	refcount_inc(&ref->refcount);
8239 	return true;
8240 }
8241 
8242 /* Pipe buffer operations for a buffer. */
8243 static const struct pipe_buf_operations buffer_pipe_buf_ops = {
8244 	.release		= buffer_pipe_buf_release,
8245 	.get			= buffer_pipe_buf_get,
8246 };
8247 
8248 /*
8249  * Callback from splice_to_pipe(), if we need to release some pages
8250  * at the end of the spd in case we error'ed out in filling the pipe.
8251  */
8252 static void buffer_spd_release(struct splice_pipe_desc *spd, unsigned int i)
8253 {
8254 	struct buffer_ref *ref =
8255 		(struct buffer_ref *)spd->partial[i].private;
8256 
8257 	buffer_ref_release(ref);
8258 	spd->partial[i].private = 0;
8259 }
8260 
8261 static ssize_t
8262 tracing_buffers_splice_read(struct file *file, loff_t *ppos,
8263 			    struct pipe_inode_info *pipe, size_t len,
8264 			    unsigned int flags)
8265 {
8266 	struct ftrace_buffer_info *info = file->private_data;
8267 	struct trace_iterator *iter = &info->iter;
8268 	struct partial_page partial_def[PIPE_DEF_BUFFERS];
8269 	struct page *pages_def[PIPE_DEF_BUFFERS];
8270 	struct splice_pipe_desc spd = {
8271 		.pages		= pages_def,
8272 		.partial	= partial_def,
8273 		.nr_pages_max	= PIPE_DEF_BUFFERS,
8274 		.ops		= &buffer_pipe_buf_ops,
8275 		.spd_release	= buffer_spd_release,
8276 	};
8277 	struct buffer_ref *ref;
8278 	bool woken = false;
8279 	int page_size;
8280 	int entries, i;
8281 	ssize_t ret = 0;
8282 
8283 #ifdef CONFIG_TRACER_MAX_TRACE
8284 	if (iter->snapshot && iter->tr->current_trace->use_max_tr)
8285 		return -EBUSY;
8286 #endif
8287 
8288 	page_size = ring_buffer_subbuf_size_get(iter->array_buffer->buffer);
8289 	if (*ppos & (page_size - 1))
8290 		return -EINVAL;
8291 
8292 	if (len & (page_size - 1)) {
8293 		if (len < page_size)
8294 			return -EINVAL;
8295 		len &= (~(page_size - 1));
8296 	}
8297 
8298 	if (splice_grow_spd(pipe, &spd))
8299 		return -ENOMEM;
8300 
8301  again:
8302 	trace_access_lock(iter->cpu_file);
8303 	entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8304 
8305 	for (i = 0; i < spd.nr_pages_max && len && entries; i++, len -= page_size) {
8306 		struct page *page;
8307 		int r;
8308 
8309 		ref = kzalloc(sizeof(*ref), GFP_KERNEL);
8310 		if (!ref) {
8311 			ret = -ENOMEM;
8312 			break;
8313 		}
8314 
8315 		refcount_set(&ref->refcount, 1);
8316 		ref->buffer = iter->array_buffer->buffer;
8317 		ref->page = ring_buffer_alloc_read_page(ref->buffer, iter->cpu_file);
8318 		if (IS_ERR(ref->page)) {
8319 			ret = PTR_ERR(ref->page);
8320 			ref->page = NULL;
8321 			kfree(ref);
8322 			break;
8323 		}
8324 		ref->cpu = iter->cpu_file;
8325 
8326 		r = ring_buffer_read_page(ref->buffer, ref->page,
8327 					  len, iter->cpu_file, 1);
8328 		if (r < 0) {
8329 			ring_buffer_free_read_page(ref->buffer, ref->cpu,
8330 						   ref->page);
8331 			kfree(ref);
8332 			break;
8333 		}
8334 
8335 		page = virt_to_page(ring_buffer_read_page_data(ref->page));
8336 
8337 		spd.pages[i] = page;
8338 		spd.partial[i].len = page_size;
8339 		spd.partial[i].offset = 0;
8340 		spd.partial[i].private = (unsigned long)ref;
8341 		spd.nr_pages++;
8342 		*ppos += page_size;
8343 
8344 		entries = ring_buffer_entries_cpu(iter->array_buffer->buffer, iter->cpu_file);
8345 	}
8346 
8347 	trace_access_unlock(iter->cpu_file);
8348 	spd.nr_pages = i;
8349 
8350 	/* did we read anything? */
8351 	if (!spd.nr_pages) {
8352 
8353 		if (ret)
8354 			goto out;
8355 
8356 		if (woken)
8357 			goto out;
8358 
8359 		ret = -EAGAIN;
8360 		if ((file->f_flags & O_NONBLOCK) || (flags & SPLICE_F_NONBLOCK))
8361 			goto out;
8362 
8363 		ret = wait_on_pipe(iter, iter->snapshot ? 0 : iter->tr->buffer_percent);
8364 		if (ret)
8365 			goto out;
8366 
8367 		/* No need to wait after waking up when tracing is off */
8368 		if (!tracer_tracing_is_on(iter->tr))
8369 			goto out;
8370 
8371 		/* Iterate one more time to collect any new data then exit */
8372 		woken = true;
8373 
8374 		goto again;
8375 	}
8376 
8377 	ret = splice_to_pipe(pipe, &spd);
8378 out:
8379 	splice_shrink_spd(&spd);
8380 
8381 	return ret;
8382 }
8383 
8384 static long tracing_buffers_ioctl(struct file *file, unsigned int cmd, unsigned long arg)
8385 {
8386 	struct ftrace_buffer_info *info = file->private_data;
8387 	struct trace_iterator *iter = &info->iter;
8388 	int err;
8389 
8390 	if (cmd == TRACE_MMAP_IOCTL_GET_READER) {
8391 		if (!(file->f_flags & O_NONBLOCK)) {
8392 			err = ring_buffer_wait(iter->array_buffer->buffer,
8393 					       iter->cpu_file,
8394 					       iter->tr->buffer_percent,
8395 					       NULL, NULL);
8396 			if (err)
8397 				return err;
8398 		}
8399 
8400 		return ring_buffer_map_get_reader(iter->array_buffer->buffer,
8401 						  iter->cpu_file);
8402 	} else if (cmd) {
8403 		return -ENOTTY;
8404 	}
8405 
8406 	/*
8407 	 * An ioctl call with cmd 0 to the ring buffer file will wake up all
8408 	 * waiters
8409 	 */
8410 	mutex_lock(&trace_types_lock);
8411 
8412 	/* Make sure the waiters see the new wait_index */
8413 	(void)atomic_fetch_inc_release(&iter->wait_index);
8414 
8415 	ring_buffer_wake_waiters(iter->array_buffer->buffer, iter->cpu_file);
8416 
8417 	mutex_unlock(&trace_types_lock);
8418 	return 0;
8419 }
8420 
8421 #ifdef CONFIG_TRACER_MAX_TRACE
8422 static int get_snapshot_map(struct trace_array *tr)
8423 {
8424 	int err = 0;
8425 
8426 	/*
8427 	 * Called with mmap_lock held. lockdep would be unhappy if we would now
8428 	 * take trace_types_lock. Instead use the specific
8429 	 * snapshot_trigger_lock.
8430 	 */
8431 	spin_lock(&tr->snapshot_trigger_lock);
8432 
8433 	if (tr->snapshot || tr->mapped == UINT_MAX)
8434 		err = -EBUSY;
8435 	else
8436 		tr->mapped++;
8437 
8438 	spin_unlock(&tr->snapshot_trigger_lock);
8439 
8440 	/* Wait for update_max_tr() to observe iter->tr->mapped */
8441 	if (tr->mapped == 1)
8442 		synchronize_rcu();
8443 
8444 	return err;
8445 
8446 }
8447 static void put_snapshot_map(struct trace_array *tr)
8448 {
8449 	spin_lock(&tr->snapshot_trigger_lock);
8450 	if (!WARN_ON(!tr->mapped))
8451 		tr->mapped--;
8452 	spin_unlock(&tr->snapshot_trigger_lock);
8453 }
8454 #else
8455 static inline int get_snapshot_map(struct trace_array *tr) { return 0; }
8456 static inline void put_snapshot_map(struct trace_array *tr) { }
8457 #endif
8458 
8459 static void tracing_buffers_mmap_close(struct vm_area_struct *vma)
8460 {
8461 	struct ftrace_buffer_info *info = vma->vm_file->private_data;
8462 	struct trace_iterator *iter = &info->iter;
8463 
8464 	WARN_ON(ring_buffer_unmap(iter->array_buffer->buffer, iter->cpu_file));
8465 	put_snapshot_map(iter->tr);
8466 }
8467 
8468 static const struct vm_operations_struct tracing_buffers_vmops = {
8469 	.close		= tracing_buffers_mmap_close,
8470 };
8471 
8472 static int tracing_buffers_mmap(struct file *filp, struct vm_area_struct *vma)
8473 {
8474 	struct ftrace_buffer_info *info = filp->private_data;
8475 	struct trace_iterator *iter = &info->iter;
8476 	int ret = 0;
8477 
8478 	ret = get_snapshot_map(iter->tr);
8479 	if (ret)
8480 		return ret;
8481 
8482 	ret = ring_buffer_map(iter->array_buffer->buffer, iter->cpu_file, vma);
8483 	if (ret)
8484 		put_snapshot_map(iter->tr);
8485 
8486 	vma->vm_ops = &tracing_buffers_vmops;
8487 
8488 	return ret;
8489 }
8490 
8491 static const struct file_operations tracing_buffers_fops = {
8492 	.open		= tracing_buffers_open,
8493 	.read		= tracing_buffers_read,
8494 	.poll		= tracing_buffers_poll,
8495 	.release	= tracing_buffers_release,
8496 	.flush		= tracing_buffers_flush,
8497 	.splice_read	= tracing_buffers_splice_read,
8498 	.unlocked_ioctl = tracing_buffers_ioctl,
8499 	.mmap		= tracing_buffers_mmap,
8500 };
8501 
8502 static ssize_t
8503 tracing_stats_read(struct file *filp, char __user *ubuf,
8504 		   size_t count, loff_t *ppos)
8505 {
8506 	struct inode *inode = file_inode(filp);
8507 	struct trace_array *tr = inode->i_private;
8508 	struct array_buffer *trace_buf = &tr->array_buffer;
8509 	int cpu = tracing_get_cpu(inode);
8510 	struct trace_seq *s;
8511 	unsigned long cnt;
8512 	unsigned long long t;
8513 	unsigned long usec_rem;
8514 
8515 	s = kmalloc(sizeof(*s), GFP_KERNEL);
8516 	if (!s)
8517 		return -ENOMEM;
8518 
8519 	trace_seq_init(s);
8520 
8521 	cnt = ring_buffer_entries_cpu(trace_buf->buffer, cpu);
8522 	trace_seq_printf(s, "entries: %ld\n", cnt);
8523 
8524 	cnt = ring_buffer_overrun_cpu(trace_buf->buffer, cpu);
8525 	trace_seq_printf(s, "overrun: %ld\n", cnt);
8526 
8527 	cnt = ring_buffer_commit_overrun_cpu(trace_buf->buffer, cpu);
8528 	trace_seq_printf(s, "commit overrun: %ld\n", cnt);
8529 
8530 	cnt = ring_buffer_bytes_cpu(trace_buf->buffer, cpu);
8531 	trace_seq_printf(s, "bytes: %ld\n", cnt);
8532 
8533 	if (trace_clocks[tr->clock_id].in_ns) {
8534 		/* local or global for trace_clock */
8535 		t = ns2usecs(ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8536 		usec_rem = do_div(t, USEC_PER_SEC);
8537 		trace_seq_printf(s, "oldest event ts: %5llu.%06lu\n",
8538 								t, usec_rem);
8539 
8540 		t = ns2usecs(ring_buffer_time_stamp(trace_buf->buffer));
8541 		usec_rem = do_div(t, USEC_PER_SEC);
8542 		trace_seq_printf(s, "now ts: %5llu.%06lu\n", t, usec_rem);
8543 	} else {
8544 		/* counter or tsc mode for trace_clock */
8545 		trace_seq_printf(s, "oldest event ts: %llu\n",
8546 				ring_buffer_oldest_event_ts(trace_buf->buffer, cpu));
8547 
8548 		trace_seq_printf(s, "now ts: %llu\n",
8549 				ring_buffer_time_stamp(trace_buf->buffer));
8550 	}
8551 
8552 	cnt = ring_buffer_dropped_events_cpu(trace_buf->buffer, cpu);
8553 	trace_seq_printf(s, "dropped events: %ld\n", cnt);
8554 
8555 	cnt = ring_buffer_read_events_cpu(trace_buf->buffer, cpu);
8556 	trace_seq_printf(s, "read events: %ld\n", cnt);
8557 
8558 	count = simple_read_from_buffer(ubuf, count, ppos,
8559 					s->buffer, trace_seq_used(s));
8560 
8561 	kfree(s);
8562 
8563 	return count;
8564 }
8565 
8566 static const struct file_operations tracing_stats_fops = {
8567 	.open		= tracing_open_generic_tr,
8568 	.read		= tracing_stats_read,
8569 	.llseek		= generic_file_llseek,
8570 	.release	= tracing_release_generic_tr,
8571 };
8572 
8573 #ifdef CONFIG_DYNAMIC_FTRACE
8574 
8575 static ssize_t
8576 tracing_read_dyn_info(struct file *filp, char __user *ubuf,
8577 		  size_t cnt, loff_t *ppos)
8578 {
8579 	ssize_t ret;
8580 	char *buf;
8581 	int r;
8582 
8583 	/* 512 should be plenty to hold the amount needed */
8584 #define DYN_INFO_BUF_SIZE	512
8585 
8586 	buf = kmalloc(DYN_INFO_BUF_SIZE, GFP_KERNEL);
8587 	if (!buf)
8588 		return -ENOMEM;
8589 
8590 	r = scnprintf(buf, DYN_INFO_BUF_SIZE,
8591 		      "%ld pages:%ld groups: %ld\n"
8592 		      "ftrace boot update time = %llu (ns)\n"
8593 		      "ftrace module total update time = %llu (ns)\n",
8594 		      ftrace_update_tot_cnt,
8595 		      ftrace_number_of_pages,
8596 		      ftrace_number_of_groups,
8597 		      ftrace_update_time,
8598 		      ftrace_total_mod_time);
8599 
8600 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
8601 	kfree(buf);
8602 	return ret;
8603 }
8604 
8605 static const struct file_operations tracing_dyn_info_fops = {
8606 	.open		= tracing_open_generic,
8607 	.read		= tracing_read_dyn_info,
8608 	.llseek		= generic_file_llseek,
8609 };
8610 #endif /* CONFIG_DYNAMIC_FTRACE */
8611 
8612 #if defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE)
8613 static void
8614 ftrace_snapshot(unsigned long ip, unsigned long parent_ip,
8615 		struct trace_array *tr, struct ftrace_probe_ops *ops,
8616 		void *data)
8617 {
8618 	tracing_snapshot_instance(tr);
8619 }
8620 
8621 static void
8622 ftrace_count_snapshot(unsigned long ip, unsigned long parent_ip,
8623 		      struct trace_array *tr, struct ftrace_probe_ops *ops,
8624 		      void *data)
8625 {
8626 	struct ftrace_func_mapper *mapper = data;
8627 	long *count = NULL;
8628 
8629 	if (mapper)
8630 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8631 
8632 	if (count) {
8633 
8634 		if (*count <= 0)
8635 			return;
8636 
8637 		(*count)--;
8638 	}
8639 
8640 	tracing_snapshot_instance(tr);
8641 }
8642 
8643 static int
8644 ftrace_snapshot_print(struct seq_file *m, unsigned long ip,
8645 		      struct ftrace_probe_ops *ops, void *data)
8646 {
8647 	struct ftrace_func_mapper *mapper = data;
8648 	long *count = NULL;
8649 
8650 	seq_printf(m, "%ps:", (void *)ip);
8651 
8652 	seq_puts(m, "snapshot");
8653 
8654 	if (mapper)
8655 		count = (long *)ftrace_func_mapper_find_ip(mapper, ip);
8656 
8657 	if (count)
8658 		seq_printf(m, ":count=%ld\n", *count);
8659 	else
8660 		seq_puts(m, ":unlimited\n");
8661 
8662 	return 0;
8663 }
8664 
8665 static int
8666 ftrace_snapshot_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
8667 		     unsigned long ip, void *init_data, void **data)
8668 {
8669 	struct ftrace_func_mapper *mapper = *data;
8670 
8671 	if (!mapper) {
8672 		mapper = allocate_ftrace_func_mapper();
8673 		if (!mapper)
8674 			return -ENOMEM;
8675 		*data = mapper;
8676 	}
8677 
8678 	return ftrace_func_mapper_add_ip(mapper, ip, init_data);
8679 }
8680 
8681 static void
8682 ftrace_snapshot_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
8683 		     unsigned long ip, void *data)
8684 {
8685 	struct ftrace_func_mapper *mapper = data;
8686 
8687 	if (!ip) {
8688 		if (!mapper)
8689 			return;
8690 		free_ftrace_func_mapper(mapper, NULL);
8691 		return;
8692 	}
8693 
8694 	ftrace_func_mapper_remove_ip(mapper, ip);
8695 }
8696 
8697 static struct ftrace_probe_ops snapshot_probe_ops = {
8698 	.func			= ftrace_snapshot,
8699 	.print			= ftrace_snapshot_print,
8700 };
8701 
8702 static struct ftrace_probe_ops snapshot_count_probe_ops = {
8703 	.func			= ftrace_count_snapshot,
8704 	.print			= ftrace_snapshot_print,
8705 	.init			= ftrace_snapshot_init,
8706 	.free			= ftrace_snapshot_free,
8707 };
8708 
8709 static int
8710 ftrace_trace_snapshot_callback(struct trace_array *tr, struct ftrace_hash *hash,
8711 			       char *glob, char *cmd, char *param, int enable)
8712 {
8713 	struct ftrace_probe_ops *ops;
8714 	void *count = (void *)-1;
8715 	char *number;
8716 	int ret;
8717 
8718 	if (!tr)
8719 		return -ENODEV;
8720 
8721 	/* hash funcs only work with set_ftrace_filter */
8722 	if (!enable)
8723 		return -EINVAL;
8724 
8725 	ops = param ? &snapshot_count_probe_ops :  &snapshot_probe_ops;
8726 
8727 	if (glob[0] == '!') {
8728 		ret = unregister_ftrace_function_probe_func(glob+1, tr, ops);
8729 		if (!ret)
8730 			tracing_disarm_snapshot(tr);
8731 
8732 		return ret;
8733 	}
8734 
8735 	if (!param)
8736 		goto out_reg;
8737 
8738 	number = strsep(&param, ":");
8739 
8740 	if (!strlen(number))
8741 		goto out_reg;
8742 
8743 	/*
8744 	 * We use the callback data field (which is a pointer)
8745 	 * as our counter.
8746 	 */
8747 	ret = kstrtoul(number, 0, (unsigned long *)&count);
8748 	if (ret)
8749 		return ret;
8750 
8751  out_reg:
8752 	ret = tracing_arm_snapshot(tr);
8753 	if (ret < 0)
8754 		goto out;
8755 
8756 	ret = register_ftrace_function_probe(glob, tr, ops, count);
8757 	if (ret < 0)
8758 		tracing_disarm_snapshot(tr);
8759  out:
8760 	return ret < 0 ? ret : 0;
8761 }
8762 
8763 static struct ftrace_func_command ftrace_snapshot_cmd = {
8764 	.name			= "snapshot",
8765 	.func			= ftrace_trace_snapshot_callback,
8766 };
8767 
8768 static __init int register_snapshot_cmd(void)
8769 {
8770 	return register_ftrace_command(&ftrace_snapshot_cmd);
8771 }
8772 #else
8773 static inline __init int register_snapshot_cmd(void) { return 0; }
8774 #endif /* defined(CONFIG_TRACER_SNAPSHOT) && defined(CONFIG_DYNAMIC_FTRACE) */
8775 
8776 static struct dentry *tracing_get_dentry(struct trace_array *tr)
8777 {
8778 	if (WARN_ON(!tr->dir))
8779 		return ERR_PTR(-ENODEV);
8780 
8781 	/* Top directory uses NULL as the parent */
8782 	if (tr->flags & TRACE_ARRAY_FL_GLOBAL)
8783 		return NULL;
8784 
8785 	/* All sub buffers have a descriptor */
8786 	return tr->dir;
8787 }
8788 
8789 static struct dentry *tracing_dentry_percpu(struct trace_array *tr, int cpu)
8790 {
8791 	struct dentry *d_tracer;
8792 
8793 	if (tr->percpu_dir)
8794 		return tr->percpu_dir;
8795 
8796 	d_tracer = tracing_get_dentry(tr);
8797 	if (IS_ERR(d_tracer))
8798 		return NULL;
8799 
8800 	tr->percpu_dir = tracefs_create_dir("per_cpu", d_tracer);
8801 
8802 	MEM_FAIL(!tr->percpu_dir,
8803 		  "Could not create tracefs directory 'per_cpu/%d'\n", cpu);
8804 
8805 	return tr->percpu_dir;
8806 }
8807 
8808 static struct dentry *
8809 trace_create_cpu_file(const char *name, umode_t mode, struct dentry *parent,
8810 		      void *data, long cpu, const struct file_operations *fops)
8811 {
8812 	struct dentry *ret = trace_create_file(name, mode, parent, data, fops);
8813 
8814 	if (ret) /* See tracing_get_cpu() */
8815 		d_inode(ret)->i_cdev = (void *)(cpu + 1);
8816 	return ret;
8817 }
8818 
8819 static void
8820 tracing_init_tracefs_percpu(struct trace_array *tr, long cpu)
8821 {
8822 	struct dentry *d_percpu = tracing_dentry_percpu(tr, cpu);
8823 	struct dentry *d_cpu;
8824 	char cpu_dir[30]; /* 30 characters should be more than enough */
8825 
8826 	if (!d_percpu)
8827 		return;
8828 
8829 	snprintf(cpu_dir, 30, "cpu%ld", cpu);
8830 	d_cpu = tracefs_create_dir(cpu_dir, d_percpu);
8831 	if (!d_cpu) {
8832 		pr_warn("Could not create tracefs '%s' entry\n", cpu_dir);
8833 		return;
8834 	}
8835 
8836 	/* per cpu trace_pipe */
8837 	trace_create_cpu_file("trace_pipe", TRACE_MODE_READ, d_cpu,
8838 				tr, cpu, &tracing_pipe_fops);
8839 
8840 	/* per cpu trace */
8841 	trace_create_cpu_file("trace", TRACE_MODE_WRITE, d_cpu,
8842 				tr, cpu, &tracing_fops);
8843 
8844 	trace_create_cpu_file("trace_pipe_raw", TRACE_MODE_READ, d_cpu,
8845 				tr, cpu, &tracing_buffers_fops);
8846 
8847 	trace_create_cpu_file("stats", TRACE_MODE_READ, d_cpu,
8848 				tr, cpu, &tracing_stats_fops);
8849 
8850 	trace_create_cpu_file("buffer_size_kb", TRACE_MODE_READ, d_cpu,
8851 				tr, cpu, &tracing_entries_fops);
8852 
8853 	if (tr->range_addr_start)
8854 		trace_create_cpu_file("buffer_meta", TRACE_MODE_READ, d_cpu,
8855 				      tr, cpu, &tracing_buffer_meta_fops);
8856 #ifdef CONFIG_TRACER_SNAPSHOT
8857 	if (!tr->range_addr_start) {
8858 		trace_create_cpu_file("snapshot", TRACE_MODE_WRITE, d_cpu,
8859 				      tr, cpu, &snapshot_fops);
8860 
8861 		trace_create_cpu_file("snapshot_raw", TRACE_MODE_READ, d_cpu,
8862 				      tr, cpu, &snapshot_raw_fops);
8863 	}
8864 #endif
8865 }
8866 
8867 #ifdef CONFIG_FTRACE_SELFTEST
8868 /* Let selftest have access to static functions in this file */
8869 #include "trace_selftest.c"
8870 #endif
8871 
8872 static ssize_t
8873 trace_options_read(struct file *filp, char __user *ubuf, size_t cnt,
8874 			loff_t *ppos)
8875 {
8876 	struct trace_option_dentry *topt = filp->private_data;
8877 	char *buf;
8878 
8879 	if (topt->flags->val & topt->opt->bit)
8880 		buf = "1\n";
8881 	else
8882 		buf = "0\n";
8883 
8884 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8885 }
8886 
8887 static ssize_t
8888 trace_options_write(struct file *filp, const char __user *ubuf, size_t cnt,
8889 			 loff_t *ppos)
8890 {
8891 	struct trace_option_dentry *topt = filp->private_data;
8892 	unsigned long val;
8893 	int ret;
8894 
8895 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
8896 	if (ret)
8897 		return ret;
8898 
8899 	if (val != 0 && val != 1)
8900 		return -EINVAL;
8901 
8902 	if (!!(topt->flags->val & topt->opt->bit) != val) {
8903 		mutex_lock(&trace_types_lock);
8904 		ret = __set_tracer_option(topt->tr, topt->flags,
8905 					  topt->opt, !val);
8906 		mutex_unlock(&trace_types_lock);
8907 		if (ret)
8908 			return ret;
8909 	}
8910 
8911 	*ppos += cnt;
8912 
8913 	return cnt;
8914 }
8915 
8916 static int tracing_open_options(struct inode *inode, struct file *filp)
8917 {
8918 	struct trace_option_dentry *topt = inode->i_private;
8919 	int ret;
8920 
8921 	ret = tracing_check_open_get_tr(topt->tr);
8922 	if (ret)
8923 		return ret;
8924 
8925 	filp->private_data = inode->i_private;
8926 	return 0;
8927 }
8928 
8929 static int tracing_release_options(struct inode *inode, struct file *file)
8930 {
8931 	struct trace_option_dentry *topt = file->private_data;
8932 
8933 	trace_array_put(topt->tr);
8934 	return 0;
8935 }
8936 
8937 static const struct file_operations trace_options_fops = {
8938 	.open = tracing_open_options,
8939 	.read = trace_options_read,
8940 	.write = trace_options_write,
8941 	.llseek	= generic_file_llseek,
8942 	.release = tracing_release_options,
8943 };
8944 
8945 /*
8946  * In order to pass in both the trace_array descriptor as well as the index
8947  * to the flag that the trace option file represents, the trace_array
8948  * has a character array of trace_flags_index[], which holds the index
8949  * of the bit for the flag it represents. index[0] == 0, index[1] == 1, etc.
8950  * The address of this character array is passed to the flag option file
8951  * read/write callbacks.
8952  *
8953  * In order to extract both the index and the trace_array descriptor,
8954  * get_tr_index() uses the following algorithm.
8955  *
8956  *   idx = *ptr;
8957  *
8958  * As the pointer itself contains the address of the index (remember
8959  * index[1] == 1).
8960  *
8961  * Then to get the trace_array descriptor, by subtracting that index
8962  * from the ptr, we get to the start of the index itself.
8963  *
8964  *   ptr - idx == &index[0]
8965  *
8966  * Then a simple container_of() from that pointer gets us to the
8967  * trace_array descriptor.
8968  */
8969 static void get_tr_index(void *data, struct trace_array **ptr,
8970 			 unsigned int *pindex)
8971 {
8972 	*pindex = *(unsigned char *)data;
8973 
8974 	*ptr = container_of(data - *pindex, struct trace_array,
8975 			    trace_flags_index);
8976 }
8977 
8978 static ssize_t
8979 trace_options_core_read(struct file *filp, char __user *ubuf, size_t cnt,
8980 			loff_t *ppos)
8981 {
8982 	void *tr_index = filp->private_data;
8983 	struct trace_array *tr;
8984 	unsigned int index;
8985 	char *buf;
8986 
8987 	get_tr_index(tr_index, &tr, &index);
8988 
8989 	if (tr->trace_flags & (1 << index))
8990 		buf = "1\n";
8991 	else
8992 		buf = "0\n";
8993 
8994 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
8995 }
8996 
8997 static ssize_t
8998 trace_options_core_write(struct file *filp, const char __user *ubuf, size_t cnt,
8999 			 loff_t *ppos)
9000 {
9001 	void *tr_index = filp->private_data;
9002 	struct trace_array *tr;
9003 	unsigned int index;
9004 	unsigned long val;
9005 	int ret;
9006 
9007 	get_tr_index(tr_index, &tr, &index);
9008 
9009 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9010 	if (ret)
9011 		return ret;
9012 
9013 	if (val != 0 && val != 1)
9014 		return -EINVAL;
9015 
9016 	mutex_lock(&event_mutex);
9017 	mutex_lock(&trace_types_lock);
9018 	ret = set_tracer_flag(tr, 1 << index, val);
9019 	mutex_unlock(&trace_types_lock);
9020 	mutex_unlock(&event_mutex);
9021 
9022 	if (ret < 0)
9023 		return ret;
9024 
9025 	*ppos += cnt;
9026 
9027 	return cnt;
9028 }
9029 
9030 static const struct file_operations trace_options_core_fops = {
9031 	.open = tracing_open_generic,
9032 	.read = trace_options_core_read,
9033 	.write = trace_options_core_write,
9034 	.llseek = generic_file_llseek,
9035 };
9036 
9037 struct dentry *trace_create_file(const char *name,
9038 				 umode_t mode,
9039 				 struct dentry *parent,
9040 				 void *data,
9041 				 const struct file_operations *fops)
9042 {
9043 	struct dentry *ret;
9044 
9045 	ret = tracefs_create_file(name, mode, parent, data, fops);
9046 	if (!ret)
9047 		pr_warn("Could not create tracefs '%s' entry\n", name);
9048 
9049 	return ret;
9050 }
9051 
9052 
9053 static struct dentry *trace_options_init_dentry(struct trace_array *tr)
9054 {
9055 	struct dentry *d_tracer;
9056 
9057 	if (tr->options)
9058 		return tr->options;
9059 
9060 	d_tracer = tracing_get_dentry(tr);
9061 	if (IS_ERR(d_tracer))
9062 		return NULL;
9063 
9064 	tr->options = tracefs_create_dir("options", d_tracer);
9065 	if (!tr->options) {
9066 		pr_warn("Could not create tracefs directory 'options'\n");
9067 		return NULL;
9068 	}
9069 
9070 	return tr->options;
9071 }
9072 
9073 static void
9074 create_trace_option_file(struct trace_array *tr,
9075 			 struct trace_option_dentry *topt,
9076 			 struct tracer_flags *flags,
9077 			 struct tracer_opt *opt)
9078 {
9079 	struct dentry *t_options;
9080 
9081 	t_options = trace_options_init_dentry(tr);
9082 	if (!t_options)
9083 		return;
9084 
9085 	topt->flags = flags;
9086 	topt->opt = opt;
9087 	topt->tr = tr;
9088 
9089 	topt->entry = trace_create_file(opt->name, TRACE_MODE_WRITE,
9090 					t_options, topt, &trace_options_fops);
9091 
9092 }
9093 
9094 static void
9095 create_trace_option_files(struct trace_array *tr, struct tracer *tracer)
9096 {
9097 	struct trace_option_dentry *topts;
9098 	struct trace_options *tr_topts;
9099 	struct tracer_flags *flags;
9100 	struct tracer_opt *opts;
9101 	int cnt;
9102 	int i;
9103 
9104 	if (!tracer)
9105 		return;
9106 
9107 	flags = tracer->flags;
9108 
9109 	if (!flags || !flags->opts)
9110 		return;
9111 
9112 	/*
9113 	 * If this is an instance, only create flags for tracers
9114 	 * the instance may have.
9115 	 */
9116 	if (!trace_ok_for_array(tracer, tr))
9117 		return;
9118 
9119 	for (i = 0; i < tr->nr_topts; i++) {
9120 		/* Make sure there's no duplicate flags. */
9121 		if (WARN_ON_ONCE(tr->topts[i].tracer->flags == tracer->flags))
9122 			return;
9123 	}
9124 
9125 	opts = flags->opts;
9126 
9127 	for (cnt = 0; opts[cnt].name; cnt++)
9128 		;
9129 
9130 	topts = kcalloc(cnt + 1, sizeof(*topts), GFP_KERNEL);
9131 	if (!topts)
9132 		return;
9133 
9134 	tr_topts = krealloc(tr->topts, sizeof(*tr->topts) * (tr->nr_topts + 1),
9135 			    GFP_KERNEL);
9136 	if (!tr_topts) {
9137 		kfree(topts);
9138 		return;
9139 	}
9140 
9141 	tr->topts = tr_topts;
9142 	tr->topts[tr->nr_topts].tracer = tracer;
9143 	tr->topts[tr->nr_topts].topts = topts;
9144 	tr->nr_topts++;
9145 
9146 	for (cnt = 0; opts[cnt].name; cnt++) {
9147 		create_trace_option_file(tr, &topts[cnt], flags,
9148 					 &opts[cnt]);
9149 		MEM_FAIL(topts[cnt].entry == NULL,
9150 			  "Failed to create trace option: %s",
9151 			  opts[cnt].name);
9152 	}
9153 }
9154 
9155 static struct dentry *
9156 create_trace_option_core_file(struct trace_array *tr,
9157 			      const char *option, long index)
9158 {
9159 	struct dentry *t_options;
9160 
9161 	t_options = trace_options_init_dentry(tr);
9162 	if (!t_options)
9163 		return NULL;
9164 
9165 	return trace_create_file(option, TRACE_MODE_WRITE, t_options,
9166 				 (void *)&tr->trace_flags_index[index],
9167 				 &trace_options_core_fops);
9168 }
9169 
9170 static void create_trace_options_dir(struct trace_array *tr)
9171 {
9172 	struct dentry *t_options;
9173 	bool top_level = tr == &global_trace;
9174 	int i;
9175 
9176 	t_options = trace_options_init_dentry(tr);
9177 	if (!t_options)
9178 		return;
9179 
9180 	for (i = 0; trace_options[i]; i++) {
9181 		if (top_level ||
9182 		    !((1 << i) & TOP_LEVEL_TRACE_FLAGS))
9183 			create_trace_option_core_file(tr, trace_options[i], i);
9184 	}
9185 }
9186 
9187 static ssize_t
9188 rb_simple_read(struct file *filp, char __user *ubuf,
9189 	       size_t cnt, loff_t *ppos)
9190 {
9191 	struct trace_array *tr = filp->private_data;
9192 	char buf[64];
9193 	int r;
9194 
9195 	r = tracer_tracing_is_on(tr);
9196 	r = sprintf(buf, "%d\n", r);
9197 
9198 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9199 }
9200 
9201 static ssize_t
9202 rb_simple_write(struct file *filp, const char __user *ubuf,
9203 		size_t cnt, loff_t *ppos)
9204 {
9205 	struct trace_array *tr = filp->private_data;
9206 	struct trace_buffer *buffer = tr->array_buffer.buffer;
9207 	unsigned long val;
9208 	int ret;
9209 
9210 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9211 	if (ret)
9212 		return ret;
9213 
9214 	if (buffer) {
9215 		mutex_lock(&trace_types_lock);
9216 		if (!!val == tracer_tracing_is_on(tr)) {
9217 			val = 0; /* do nothing */
9218 		} else if (val) {
9219 			tracer_tracing_on(tr);
9220 			if (tr->current_trace->start)
9221 				tr->current_trace->start(tr);
9222 		} else {
9223 			tracer_tracing_off(tr);
9224 			if (tr->current_trace->stop)
9225 				tr->current_trace->stop(tr);
9226 			/* Wake up any waiters */
9227 			ring_buffer_wake_waiters(buffer, RING_BUFFER_ALL_CPUS);
9228 		}
9229 		mutex_unlock(&trace_types_lock);
9230 	}
9231 
9232 	(*ppos)++;
9233 
9234 	return cnt;
9235 }
9236 
9237 static const struct file_operations rb_simple_fops = {
9238 	.open		= tracing_open_generic_tr,
9239 	.read		= rb_simple_read,
9240 	.write		= rb_simple_write,
9241 	.release	= tracing_release_generic_tr,
9242 	.llseek		= default_llseek,
9243 };
9244 
9245 static ssize_t
9246 buffer_percent_read(struct file *filp, char __user *ubuf,
9247 		    size_t cnt, loff_t *ppos)
9248 {
9249 	struct trace_array *tr = filp->private_data;
9250 	char buf[64];
9251 	int r;
9252 
9253 	r = tr->buffer_percent;
9254 	r = sprintf(buf, "%d\n", r);
9255 
9256 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9257 }
9258 
9259 static ssize_t
9260 buffer_percent_write(struct file *filp, const char __user *ubuf,
9261 		     size_t cnt, loff_t *ppos)
9262 {
9263 	struct trace_array *tr = filp->private_data;
9264 	unsigned long val;
9265 	int ret;
9266 
9267 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9268 	if (ret)
9269 		return ret;
9270 
9271 	if (val > 100)
9272 		return -EINVAL;
9273 
9274 	tr->buffer_percent = val;
9275 
9276 	(*ppos)++;
9277 
9278 	return cnt;
9279 }
9280 
9281 static const struct file_operations buffer_percent_fops = {
9282 	.open		= tracing_open_generic_tr,
9283 	.read		= buffer_percent_read,
9284 	.write		= buffer_percent_write,
9285 	.release	= tracing_release_generic_tr,
9286 	.llseek		= default_llseek,
9287 };
9288 
9289 static ssize_t
9290 buffer_subbuf_size_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
9291 {
9292 	struct trace_array *tr = filp->private_data;
9293 	size_t size;
9294 	char buf[64];
9295 	int order;
9296 	int r;
9297 
9298 	order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9299 	size = (PAGE_SIZE << order) / 1024;
9300 
9301 	r = sprintf(buf, "%zd\n", size);
9302 
9303 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, r);
9304 }
9305 
9306 static ssize_t
9307 buffer_subbuf_size_write(struct file *filp, const char __user *ubuf,
9308 			 size_t cnt, loff_t *ppos)
9309 {
9310 	struct trace_array *tr = filp->private_data;
9311 	unsigned long val;
9312 	int old_order;
9313 	int order;
9314 	int pages;
9315 	int ret;
9316 
9317 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
9318 	if (ret)
9319 		return ret;
9320 
9321 	val *= 1024; /* value passed in is in KB */
9322 
9323 	pages = DIV_ROUND_UP(val, PAGE_SIZE);
9324 	order = fls(pages - 1);
9325 
9326 	/* limit between 1 and 128 system pages */
9327 	if (order < 0 || order > 7)
9328 		return -EINVAL;
9329 
9330 	/* Do not allow tracing while changing the order of the ring buffer */
9331 	tracing_stop_tr(tr);
9332 
9333 	old_order = ring_buffer_subbuf_order_get(tr->array_buffer.buffer);
9334 	if (old_order == order)
9335 		goto out;
9336 
9337 	ret = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, order);
9338 	if (ret)
9339 		goto out;
9340 
9341 #ifdef CONFIG_TRACER_MAX_TRACE
9342 
9343 	if (!tr->allocated_snapshot)
9344 		goto out_max;
9345 
9346 	ret = ring_buffer_subbuf_order_set(tr->max_buffer.buffer, order);
9347 	if (ret) {
9348 		/* Put back the old order */
9349 		cnt = ring_buffer_subbuf_order_set(tr->array_buffer.buffer, old_order);
9350 		if (WARN_ON_ONCE(cnt)) {
9351 			/*
9352 			 * AARGH! We are left with different orders!
9353 			 * The max buffer is our "snapshot" buffer.
9354 			 * When a tracer needs a snapshot (one of the
9355 			 * latency tracers), it swaps the max buffer
9356 			 * with the saved snap shot. We succeeded to
9357 			 * update the order of the main buffer, but failed to
9358 			 * update the order of the max buffer. But when we tried
9359 			 * to reset the main buffer to the original size, we
9360 			 * failed there too. This is very unlikely to
9361 			 * happen, but if it does, warn and kill all
9362 			 * tracing.
9363 			 */
9364 			tracing_disabled = 1;
9365 		}
9366 		goto out;
9367 	}
9368  out_max:
9369 #endif
9370 	(*ppos)++;
9371  out:
9372 	if (ret)
9373 		cnt = ret;
9374 	tracing_start_tr(tr);
9375 	return cnt;
9376 }
9377 
9378 static const struct file_operations buffer_subbuf_size_fops = {
9379 	.open		= tracing_open_generic_tr,
9380 	.read		= buffer_subbuf_size_read,
9381 	.write		= buffer_subbuf_size_write,
9382 	.release	= tracing_release_generic_tr,
9383 	.llseek		= default_llseek,
9384 };
9385 
9386 static struct dentry *trace_instance_dir;
9387 
9388 static void
9389 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer);
9390 
9391 static int
9392 allocate_trace_buffer(struct trace_array *tr, struct array_buffer *buf, int size)
9393 {
9394 	enum ring_buffer_flags rb_flags;
9395 
9396 	rb_flags = tr->trace_flags & TRACE_ITER_OVERWRITE ? RB_FL_OVERWRITE : 0;
9397 
9398 	buf->tr = tr;
9399 
9400 	if (tr->range_addr_start && tr->range_addr_size) {
9401 		buf->buffer = ring_buffer_alloc_range(size, rb_flags, 0,
9402 						      tr->range_addr_start,
9403 						      tr->range_addr_size);
9404 
9405 		ring_buffer_last_boot_delta(buf->buffer,
9406 					    &tr->text_delta, &tr->data_delta);
9407 		/*
9408 		 * This is basically the same as a mapped buffer,
9409 		 * with the same restrictions.
9410 		 */
9411 		tr->mapped++;
9412 	} else {
9413 		buf->buffer = ring_buffer_alloc(size, rb_flags);
9414 	}
9415 	if (!buf->buffer)
9416 		return -ENOMEM;
9417 
9418 	buf->data = alloc_percpu(struct trace_array_cpu);
9419 	if (!buf->data) {
9420 		ring_buffer_free(buf->buffer);
9421 		buf->buffer = NULL;
9422 		return -ENOMEM;
9423 	}
9424 
9425 	/* Allocate the first page for all buffers */
9426 	set_buffer_entries(&tr->array_buffer,
9427 			   ring_buffer_size(tr->array_buffer.buffer, 0));
9428 
9429 	return 0;
9430 }
9431 
9432 static void free_trace_buffer(struct array_buffer *buf)
9433 {
9434 	if (buf->buffer) {
9435 		ring_buffer_free(buf->buffer);
9436 		buf->buffer = NULL;
9437 		free_percpu(buf->data);
9438 		buf->data = NULL;
9439 	}
9440 }
9441 
9442 static int allocate_trace_buffers(struct trace_array *tr, int size)
9443 {
9444 	int ret;
9445 
9446 	ret = allocate_trace_buffer(tr, &tr->array_buffer, size);
9447 	if (ret)
9448 		return ret;
9449 
9450 #ifdef CONFIG_TRACER_MAX_TRACE
9451 	/* Fix mapped buffer trace arrays do not have snapshot buffers */
9452 	if (tr->range_addr_start)
9453 		return 0;
9454 
9455 	ret = allocate_trace_buffer(tr, &tr->max_buffer,
9456 				    allocate_snapshot ? size : 1);
9457 	if (MEM_FAIL(ret, "Failed to allocate trace buffer\n")) {
9458 		free_trace_buffer(&tr->array_buffer);
9459 		return -ENOMEM;
9460 	}
9461 	tr->allocated_snapshot = allocate_snapshot;
9462 
9463 	allocate_snapshot = false;
9464 #endif
9465 
9466 	return 0;
9467 }
9468 
9469 static void free_trace_buffers(struct trace_array *tr)
9470 {
9471 	if (!tr)
9472 		return;
9473 
9474 	free_trace_buffer(&tr->array_buffer);
9475 
9476 #ifdef CONFIG_TRACER_MAX_TRACE
9477 	free_trace_buffer(&tr->max_buffer);
9478 #endif
9479 }
9480 
9481 static void init_trace_flags_index(struct trace_array *tr)
9482 {
9483 	int i;
9484 
9485 	/* Used by the trace options files */
9486 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++)
9487 		tr->trace_flags_index[i] = i;
9488 }
9489 
9490 static void __update_tracer_options(struct trace_array *tr)
9491 {
9492 	struct tracer *t;
9493 
9494 	for (t = trace_types; t; t = t->next)
9495 		add_tracer_options(tr, t);
9496 }
9497 
9498 static void update_tracer_options(struct trace_array *tr)
9499 {
9500 	mutex_lock(&trace_types_lock);
9501 	tracer_options_updated = true;
9502 	__update_tracer_options(tr);
9503 	mutex_unlock(&trace_types_lock);
9504 }
9505 
9506 /* Must have trace_types_lock held */
9507 struct trace_array *trace_array_find(const char *instance)
9508 {
9509 	struct trace_array *tr, *found = NULL;
9510 
9511 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9512 		if (tr->name && strcmp(tr->name, instance) == 0) {
9513 			found = tr;
9514 			break;
9515 		}
9516 	}
9517 
9518 	return found;
9519 }
9520 
9521 struct trace_array *trace_array_find_get(const char *instance)
9522 {
9523 	struct trace_array *tr;
9524 
9525 	mutex_lock(&trace_types_lock);
9526 	tr = trace_array_find(instance);
9527 	if (tr)
9528 		tr->ref++;
9529 	mutex_unlock(&trace_types_lock);
9530 
9531 	return tr;
9532 }
9533 
9534 static int trace_array_create_dir(struct trace_array *tr)
9535 {
9536 	int ret;
9537 
9538 	tr->dir = tracefs_create_dir(tr->name, trace_instance_dir);
9539 	if (!tr->dir)
9540 		return -EINVAL;
9541 
9542 	ret = event_trace_add_tracer(tr->dir, tr);
9543 	if (ret) {
9544 		tracefs_remove(tr->dir);
9545 		return ret;
9546 	}
9547 
9548 	init_tracer_tracefs(tr, tr->dir);
9549 	__update_tracer_options(tr);
9550 
9551 	return ret;
9552 }
9553 
9554 static struct trace_array *
9555 trace_array_create_systems(const char *name, const char *systems,
9556 			   unsigned long range_addr_start,
9557 			   unsigned long range_addr_size)
9558 {
9559 	struct trace_array *tr;
9560 	int ret;
9561 
9562 	ret = -ENOMEM;
9563 	tr = kzalloc(sizeof(*tr), GFP_KERNEL);
9564 	if (!tr)
9565 		return ERR_PTR(ret);
9566 
9567 	tr->name = kstrdup(name, GFP_KERNEL);
9568 	if (!tr->name)
9569 		goto out_free_tr;
9570 
9571 	if (!alloc_cpumask_var(&tr->tracing_cpumask, GFP_KERNEL))
9572 		goto out_free_tr;
9573 
9574 	if (!zalloc_cpumask_var(&tr->pipe_cpumask, GFP_KERNEL))
9575 		goto out_free_tr;
9576 
9577 	if (systems) {
9578 		tr->system_names = kstrdup_const(systems, GFP_KERNEL);
9579 		if (!tr->system_names)
9580 			goto out_free_tr;
9581 	}
9582 
9583 	/* Only for boot up memory mapped ring buffers */
9584 	tr->range_addr_start = range_addr_start;
9585 	tr->range_addr_size = range_addr_size;
9586 
9587 	tr->trace_flags = global_trace.trace_flags & ~ZEROED_TRACE_FLAGS;
9588 
9589 	cpumask_copy(tr->tracing_cpumask, cpu_all_mask);
9590 
9591 	raw_spin_lock_init(&tr->start_lock);
9592 
9593 	tr->max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
9594 #ifdef CONFIG_TRACER_MAX_TRACE
9595 	spin_lock_init(&tr->snapshot_trigger_lock);
9596 #endif
9597 	tr->current_trace = &nop_trace;
9598 
9599 	INIT_LIST_HEAD(&tr->systems);
9600 	INIT_LIST_HEAD(&tr->events);
9601 	INIT_LIST_HEAD(&tr->hist_vars);
9602 	INIT_LIST_HEAD(&tr->err_log);
9603 
9604 	if (allocate_trace_buffers(tr, trace_buf_size) < 0)
9605 		goto out_free_tr;
9606 
9607 	/* The ring buffer is defaultly expanded */
9608 	trace_set_ring_buffer_expanded(tr);
9609 
9610 	if (ftrace_allocate_ftrace_ops(tr) < 0)
9611 		goto out_free_tr;
9612 
9613 	ftrace_init_trace_array(tr);
9614 
9615 	init_trace_flags_index(tr);
9616 
9617 	if (trace_instance_dir) {
9618 		ret = trace_array_create_dir(tr);
9619 		if (ret)
9620 			goto out_free_tr;
9621 	} else
9622 		__trace_early_add_events(tr);
9623 
9624 	list_add(&tr->list, &ftrace_trace_arrays);
9625 
9626 	tr->ref++;
9627 
9628 	return tr;
9629 
9630  out_free_tr:
9631 	ftrace_free_ftrace_ops(tr);
9632 	free_trace_buffers(tr);
9633 	free_cpumask_var(tr->pipe_cpumask);
9634 	free_cpumask_var(tr->tracing_cpumask);
9635 	kfree_const(tr->system_names);
9636 	kfree(tr->name);
9637 	kfree(tr);
9638 
9639 	return ERR_PTR(ret);
9640 }
9641 
9642 static struct trace_array *trace_array_create(const char *name)
9643 {
9644 	return trace_array_create_systems(name, NULL, 0, 0);
9645 }
9646 
9647 static int instance_mkdir(const char *name)
9648 {
9649 	struct trace_array *tr;
9650 	int ret;
9651 
9652 	mutex_lock(&event_mutex);
9653 	mutex_lock(&trace_types_lock);
9654 
9655 	ret = -EEXIST;
9656 	if (trace_array_find(name))
9657 		goto out_unlock;
9658 
9659 	tr = trace_array_create(name);
9660 
9661 	ret = PTR_ERR_OR_ZERO(tr);
9662 
9663 out_unlock:
9664 	mutex_unlock(&trace_types_lock);
9665 	mutex_unlock(&event_mutex);
9666 	return ret;
9667 }
9668 
9669 static u64 map_pages(u64 start, u64 size)
9670 {
9671 	struct page **pages;
9672 	phys_addr_t page_start;
9673 	unsigned int page_count;
9674 	unsigned int i;
9675 	void *vaddr;
9676 
9677 	page_count = DIV_ROUND_UP(size, PAGE_SIZE);
9678 
9679 	page_start = start;
9680 	pages = kmalloc_array(page_count, sizeof(struct page *), GFP_KERNEL);
9681 	if (!pages)
9682 		return 0;
9683 
9684 	for (i = 0; i < page_count; i++) {
9685 		phys_addr_t addr = page_start + i * PAGE_SIZE;
9686 		pages[i] = pfn_to_page(addr >> PAGE_SHIFT);
9687 	}
9688 	vaddr = vmap(pages, page_count, VM_MAP, PAGE_KERNEL);
9689 	kfree(pages);
9690 
9691 	return (u64)(unsigned long)vaddr;
9692 }
9693 
9694 /**
9695  * trace_array_get_by_name - Create/Lookup a trace array, given its name.
9696  * @name: The name of the trace array to be looked up/created.
9697  * @systems: A list of systems to create event directories for (NULL for all)
9698  *
9699  * Returns pointer to trace array with given name.
9700  * NULL, if it cannot be created.
9701  *
9702  * NOTE: This function increments the reference counter associated with the
9703  * trace array returned. This makes sure it cannot be freed while in use.
9704  * Use trace_array_put() once the trace array is no longer needed.
9705  * If the trace_array is to be freed, trace_array_destroy() needs to
9706  * be called after the trace_array_put(), or simply let user space delete
9707  * it from the tracefs instances directory. But until the
9708  * trace_array_put() is called, user space can not delete it.
9709  *
9710  */
9711 struct trace_array *trace_array_get_by_name(const char *name, const char *systems)
9712 {
9713 	struct trace_array *tr;
9714 
9715 	mutex_lock(&event_mutex);
9716 	mutex_lock(&trace_types_lock);
9717 
9718 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9719 		if (tr->name && strcmp(tr->name, name) == 0)
9720 			goto out_unlock;
9721 	}
9722 
9723 	tr = trace_array_create_systems(name, systems, 0, 0);
9724 
9725 	if (IS_ERR(tr))
9726 		tr = NULL;
9727 out_unlock:
9728 	if (tr)
9729 		tr->ref++;
9730 
9731 	mutex_unlock(&trace_types_lock);
9732 	mutex_unlock(&event_mutex);
9733 	return tr;
9734 }
9735 EXPORT_SYMBOL_GPL(trace_array_get_by_name);
9736 
9737 static int __remove_instance(struct trace_array *tr)
9738 {
9739 	int i;
9740 
9741 	/* Reference counter for a newly created trace array = 1. */
9742 	if (tr->ref > 1 || (tr->current_trace && tr->trace_ref))
9743 		return -EBUSY;
9744 
9745 	list_del(&tr->list);
9746 
9747 	/* Disable all the flags that were enabled coming in */
9748 	for (i = 0; i < TRACE_FLAGS_MAX_SIZE; i++) {
9749 		if ((1 << i) & ZEROED_TRACE_FLAGS)
9750 			set_tracer_flag(tr, 1 << i, 0);
9751 	}
9752 
9753 	if (printk_trace == tr)
9754 		update_printk_trace(&global_trace);
9755 
9756 	tracing_set_nop(tr);
9757 	clear_ftrace_function_probes(tr);
9758 	event_trace_del_tracer(tr);
9759 	ftrace_clear_pids(tr);
9760 	ftrace_destroy_function_files(tr);
9761 	tracefs_remove(tr->dir);
9762 	free_percpu(tr->last_func_repeats);
9763 	free_trace_buffers(tr);
9764 	clear_tracing_err_log(tr);
9765 
9766 	for (i = 0; i < tr->nr_topts; i++) {
9767 		kfree(tr->topts[i].topts);
9768 	}
9769 	kfree(tr->topts);
9770 
9771 	free_cpumask_var(tr->pipe_cpumask);
9772 	free_cpumask_var(tr->tracing_cpumask);
9773 	kfree_const(tr->system_names);
9774 	kfree(tr->name);
9775 	kfree(tr);
9776 
9777 	return 0;
9778 }
9779 
9780 int trace_array_destroy(struct trace_array *this_tr)
9781 {
9782 	struct trace_array *tr;
9783 	int ret;
9784 
9785 	if (!this_tr)
9786 		return -EINVAL;
9787 
9788 	mutex_lock(&event_mutex);
9789 	mutex_lock(&trace_types_lock);
9790 
9791 	ret = -ENODEV;
9792 
9793 	/* Making sure trace array exists before destroying it. */
9794 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9795 		if (tr == this_tr) {
9796 			ret = __remove_instance(tr);
9797 			break;
9798 		}
9799 	}
9800 
9801 	mutex_unlock(&trace_types_lock);
9802 	mutex_unlock(&event_mutex);
9803 
9804 	return ret;
9805 }
9806 EXPORT_SYMBOL_GPL(trace_array_destroy);
9807 
9808 static int instance_rmdir(const char *name)
9809 {
9810 	struct trace_array *tr;
9811 	int ret;
9812 
9813 	mutex_lock(&event_mutex);
9814 	mutex_lock(&trace_types_lock);
9815 
9816 	ret = -ENODEV;
9817 	tr = trace_array_find(name);
9818 	if (tr)
9819 		ret = __remove_instance(tr);
9820 
9821 	mutex_unlock(&trace_types_lock);
9822 	mutex_unlock(&event_mutex);
9823 
9824 	return ret;
9825 }
9826 
9827 static __init void create_trace_instances(struct dentry *d_tracer)
9828 {
9829 	struct trace_array *tr;
9830 
9831 	trace_instance_dir = tracefs_create_instance_dir("instances", d_tracer,
9832 							 instance_mkdir,
9833 							 instance_rmdir);
9834 	if (MEM_FAIL(!trace_instance_dir, "Failed to create instances directory\n"))
9835 		return;
9836 
9837 	mutex_lock(&event_mutex);
9838 	mutex_lock(&trace_types_lock);
9839 
9840 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
9841 		if (!tr->name)
9842 			continue;
9843 		if (MEM_FAIL(trace_array_create_dir(tr) < 0,
9844 			     "Failed to create instance directory\n"))
9845 			break;
9846 	}
9847 
9848 	mutex_unlock(&trace_types_lock);
9849 	mutex_unlock(&event_mutex);
9850 }
9851 
9852 static void
9853 init_tracer_tracefs(struct trace_array *tr, struct dentry *d_tracer)
9854 {
9855 	int cpu;
9856 
9857 	trace_create_file("available_tracers", TRACE_MODE_READ, d_tracer,
9858 			tr, &show_traces_fops);
9859 
9860 	trace_create_file("current_tracer", TRACE_MODE_WRITE, d_tracer,
9861 			tr, &set_tracer_fops);
9862 
9863 	trace_create_file("tracing_cpumask", TRACE_MODE_WRITE, d_tracer,
9864 			  tr, &tracing_cpumask_fops);
9865 
9866 	trace_create_file("trace_options", TRACE_MODE_WRITE, d_tracer,
9867 			  tr, &tracing_iter_fops);
9868 
9869 	trace_create_file("trace", TRACE_MODE_WRITE, d_tracer,
9870 			  tr, &tracing_fops);
9871 
9872 	trace_create_file("trace_pipe", TRACE_MODE_READ, d_tracer,
9873 			  tr, &tracing_pipe_fops);
9874 
9875 	trace_create_file("buffer_size_kb", TRACE_MODE_WRITE, d_tracer,
9876 			  tr, &tracing_entries_fops);
9877 
9878 	trace_create_file("buffer_total_size_kb", TRACE_MODE_READ, d_tracer,
9879 			  tr, &tracing_total_entries_fops);
9880 
9881 	trace_create_file("free_buffer", 0200, d_tracer,
9882 			  tr, &tracing_free_buffer_fops);
9883 
9884 	trace_create_file("trace_marker", 0220, d_tracer,
9885 			  tr, &tracing_mark_fops);
9886 
9887 	tr->trace_marker_file = __find_event_file(tr, "ftrace", "print");
9888 
9889 	trace_create_file("trace_marker_raw", 0220, d_tracer,
9890 			  tr, &tracing_mark_raw_fops);
9891 
9892 	trace_create_file("trace_clock", TRACE_MODE_WRITE, d_tracer, tr,
9893 			  &trace_clock_fops);
9894 
9895 	trace_create_file("tracing_on", TRACE_MODE_WRITE, d_tracer,
9896 			  tr, &rb_simple_fops);
9897 
9898 	trace_create_file("timestamp_mode", TRACE_MODE_READ, d_tracer, tr,
9899 			  &trace_time_stamp_mode_fops);
9900 
9901 	tr->buffer_percent = 50;
9902 
9903 	trace_create_file("buffer_percent", TRACE_MODE_WRITE, d_tracer,
9904 			tr, &buffer_percent_fops);
9905 
9906 	trace_create_file("buffer_subbuf_size_kb", TRACE_MODE_WRITE, d_tracer,
9907 			  tr, &buffer_subbuf_size_fops);
9908 
9909 	create_trace_options_dir(tr);
9910 
9911 #ifdef CONFIG_TRACER_MAX_TRACE
9912 	trace_create_maxlat_file(tr, d_tracer);
9913 #endif
9914 
9915 	if (ftrace_create_function_files(tr, d_tracer))
9916 		MEM_FAIL(1, "Could not allocate function filter files");
9917 
9918 	if (tr->range_addr_start) {
9919 		trace_create_file("last_boot_info", TRACE_MODE_READ, d_tracer,
9920 				  tr, &last_boot_fops);
9921 #ifdef CONFIG_TRACER_SNAPSHOT
9922 	} else {
9923 		trace_create_file("snapshot", TRACE_MODE_WRITE, d_tracer,
9924 				  tr, &snapshot_fops);
9925 #endif
9926 	}
9927 
9928 	trace_create_file("error_log", TRACE_MODE_WRITE, d_tracer,
9929 			  tr, &tracing_err_log_fops);
9930 
9931 	for_each_tracing_cpu(cpu)
9932 		tracing_init_tracefs_percpu(tr, cpu);
9933 
9934 	ftrace_init_tracefs(tr, d_tracer);
9935 }
9936 
9937 static struct vfsmount *trace_automount(struct dentry *mntpt, void *ingore)
9938 {
9939 	struct vfsmount *mnt;
9940 	struct file_system_type *type;
9941 
9942 	/*
9943 	 * To maintain backward compatibility for tools that mount
9944 	 * debugfs to get to the tracing facility, tracefs is automatically
9945 	 * mounted to the debugfs/tracing directory.
9946 	 */
9947 	type = get_fs_type("tracefs");
9948 	if (!type)
9949 		return NULL;
9950 	mnt = vfs_submount(mntpt, type, "tracefs", NULL);
9951 	put_filesystem(type);
9952 	if (IS_ERR(mnt))
9953 		return NULL;
9954 	mntget(mnt);
9955 
9956 	return mnt;
9957 }
9958 
9959 /**
9960  * tracing_init_dentry - initialize top level trace array
9961  *
9962  * This is called when creating files or directories in the tracing
9963  * directory. It is called via fs_initcall() by any of the boot up code
9964  * and expects to return the dentry of the top level tracing directory.
9965  */
9966 int tracing_init_dentry(void)
9967 {
9968 	struct trace_array *tr = &global_trace;
9969 
9970 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
9971 		pr_warn("Tracing disabled due to lockdown\n");
9972 		return -EPERM;
9973 	}
9974 
9975 	/* The top level trace array uses  NULL as parent */
9976 	if (tr->dir)
9977 		return 0;
9978 
9979 	if (WARN_ON(!tracefs_initialized()))
9980 		return -ENODEV;
9981 
9982 	/*
9983 	 * As there may still be users that expect the tracing
9984 	 * files to exist in debugfs/tracing, we must automount
9985 	 * the tracefs file system there, so older tools still
9986 	 * work with the newer kernel.
9987 	 */
9988 	tr->dir = debugfs_create_automount("tracing", NULL,
9989 					   trace_automount, NULL);
9990 
9991 	return 0;
9992 }
9993 
9994 extern struct trace_eval_map *__start_ftrace_eval_maps[];
9995 extern struct trace_eval_map *__stop_ftrace_eval_maps[];
9996 
9997 static struct workqueue_struct *eval_map_wq __initdata;
9998 static struct work_struct eval_map_work __initdata;
9999 static struct work_struct tracerfs_init_work __initdata;
10000 
10001 static void __init eval_map_work_func(struct work_struct *work)
10002 {
10003 	int len;
10004 
10005 	len = __stop_ftrace_eval_maps - __start_ftrace_eval_maps;
10006 	trace_insert_eval_map(NULL, __start_ftrace_eval_maps, len);
10007 }
10008 
10009 static int __init trace_eval_init(void)
10010 {
10011 	INIT_WORK(&eval_map_work, eval_map_work_func);
10012 
10013 	eval_map_wq = alloc_workqueue("eval_map_wq", WQ_UNBOUND, 0);
10014 	if (!eval_map_wq) {
10015 		pr_err("Unable to allocate eval_map_wq\n");
10016 		/* Do work here */
10017 		eval_map_work_func(&eval_map_work);
10018 		return -ENOMEM;
10019 	}
10020 
10021 	queue_work(eval_map_wq, &eval_map_work);
10022 	return 0;
10023 }
10024 
10025 subsys_initcall(trace_eval_init);
10026 
10027 static int __init trace_eval_sync(void)
10028 {
10029 	/* Make sure the eval map updates are finished */
10030 	if (eval_map_wq)
10031 		destroy_workqueue(eval_map_wq);
10032 	return 0;
10033 }
10034 
10035 late_initcall_sync(trace_eval_sync);
10036 
10037 
10038 #ifdef CONFIG_MODULES
10039 static void trace_module_add_evals(struct module *mod)
10040 {
10041 	if (!mod->num_trace_evals)
10042 		return;
10043 
10044 	/*
10045 	 * Modules with bad taint do not have events created, do
10046 	 * not bother with enums either.
10047 	 */
10048 	if (trace_module_has_bad_taint(mod))
10049 		return;
10050 
10051 	trace_insert_eval_map(mod, mod->trace_evals, mod->num_trace_evals);
10052 }
10053 
10054 #ifdef CONFIG_TRACE_EVAL_MAP_FILE
10055 static void trace_module_remove_evals(struct module *mod)
10056 {
10057 	union trace_eval_map_item *map;
10058 	union trace_eval_map_item **last = &trace_eval_maps;
10059 
10060 	if (!mod->num_trace_evals)
10061 		return;
10062 
10063 	mutex_lock(&trace_eval_mutex);
10064 
10065 	map = trace_eval_maps;
10066 
10067 	while (map) {
10068 		if (map->head.mod == mod)
10069 			break;
10070 		map = trace_eval_jmp_to_tail(map);
10071 		last = &map->tail.next;
10072 		map = map->tail.next;
10073 	}
10074 	if (!map)
10075 		goto out;
10076 
10077 	*last = trace_eval_jmp_to_tail(map)->tail.next;
10078 	kfree(map);
10079  out:
10080 	mutex_unlock(&trace_eval_mutex);
10081 }
10082 #else
10083 static inline void trace_module_remove_evals(struct module *mod) { }
10084 #endif /* CONFIG_TRACE_EVAL_MAP_FILE */
10085 
10086 static int trace_module_notify(struct notifier_block *self,
10087 			       unsigned long val, void *data)
10088 {
10089 	struct module *mod = data;
10090 
10091 	switch (val) {
10092 	case MODULE_STATE_COMING:
10093 		trace_module_add_evals(mod);
10094 		break;
10095 	case MODULE_STATE_GOING:
10096 		trace_module_remove_evals(mod);
10097 		break;
10098 	}
10099 
10100 	return NOTIFY_OK;
10101 }
10102 
10103 static struct notifier_block trace_module_nb = {
10104 	.notifier_call = trace_module_notify,
10105 	.priority = 0,
10106 };
10107 #endif /* CONFIG_MODULES */
10108 
10109 static __init void tracer_init_tracefs_work_func(struct work_struct *work)
10110 {
10111 
10112 	event_trace_init();
10113 
10114 	init_tracer_tracefs(&global_trace, NULL);
10115 	ftrace_init_tracefs_toplevel(&global_trace, NULL);
10116 
10117 	trace_create_file("tracing_thresh", TRACE_MODE_WRITE, NULL,
10118 			&global_trace, &tracing_thresh_fops);
10119 
10120 	trace_create_file("README", TRACE_MODE_READ, NULL,
10121 			NULL, &tracing_readme_fops);
10122 
10123 	trace_create_file("saved_cmdlines", TRACE_MODE_READ, NULL,
10124 			NULL, &tracing_saved_cmdlines_fops);
10125 
10126 	trace_create_file("saved_cmdlines_size", TRACE_MODE_WRITE, NULL,
10127 			  NULL, &tracing_saved_cmdlines_size_fops);
10128 
10129 	trace_create_file("saved_tgids", TRACE_MODE_READ, NULL,
10130 			NULL, &tracing_saved_tgids_fops);
10131 
10132 	trace_create_eval_file(NULL);
10133 
10134 #ifdef CONFIG_MODULES
10135 	register_module_notifier(&trace_module_nb);
10136 #endif
10137 
10138 #ifdef CONFIG_DYNAMIC_FTRACE
10139 	trace_create_file("dyn_ftrace_total_info", TRACE_MODE_READ, NULL,
10140 			NULL, &tracing_dyn_info_fops);
10141 #endif
10142 
10143 	create_trace_instances(NULL);
10144 
10145 	update_tracer_options(&global_trace);
10146 }
10147 
10148 static __init int tracer_init_tracefs(void)
10149 {
10150 	int ret;
10151 
10152 	trace_access_lock_init();
10153 
10154 	ret = tracing_init_dentry();
10155 	if (ret)
10156 		return 0;
10157 
10158 	if (eval_map_wq) {
10159 		INIT_WORK(&tracerfs_init_work, tracer_init_tracefs_work_func);
10160 		queue_work(eval_map_wq, &tracerfs_init_work);
10161 	} else {
10162 		tracer_init_tracefs_work_func(NULL);
10163 	}
10164 
10165 	rv_init_interface();
10166 
10167 	return 0;
10168 }
10169 
10170 fs_initcall(tracer_init_tracefs);
10171 
10172 static int trace_die_panic_handler(struct notifier_block *self,
10173 				unsigned long ev, void *unused);
10174 
10175 static struct notifier_block trace_panic_notifier = {
10176 	.notifier_call = trace_die_panic_handler,
10177 	.priority = INT_MAX - 1,
10178 };
10179 
10180 static struct notifier_block trace_die_notifier = {
10181 	.notifier_call = trace_die_panic_handler,
10182 	.priority = INT_MAX - 1,
10183 };
10184 
10185 /*
10186  * The idea is to execute the following die/panic callback early, in order
10187  * to avoid showing irrelevant information in the trace (like other panic
10188  * notifier functions); we are the 2nd to run, after hung_task/rcu_stall
10189  * warnings get disabled (to prevent potential log flooding).
10190  */
10191 static int trace_die_panic_handler(struct notifier_block *self,
10192 				unsigned long ev, void *unused)
10193 {
10194 	if (!ftrace_dump_on_oops_enabled())
10195 		return NOTIFY_DONE;
10196 
10197 	/* The die notifier requires DIE_OOPS to trigger */
10198 	if (self == &trace_die_notifier && ev != DIE_OOPS)
10199 		return NOTIFY_DONE;
10200 
10201 	ftrace_dump(DUMP_PARAM);
10202 
10203 	return NOTIFY_DONE;
10204 }
10205 
10206 /*
10207  * printk is set to max of 1024, we really don't need it that big.
10208  * Nothing should be printing 1000 characters anyway.
10209  */
10210 #define TRACE_MAX_PRINT		1000
10211 
10212 /*
10213  * Define here KERN_TRACE so that we have one place to modify
10214  * it if we decide to change what log level the ftrace dump
10215  * should be at.
10216  */
10217 #define KERN_TRACE		KERN_EMERG
10218 
10219 void
10220 trace_printk_seq(struct trace_seq *s)
10221 {
10222 	/* Probably should print a warning here. */
10223 	if (s->seq.len >= TRACE_MAX_PRINT)
10224 		s->seq.len = TRACE_MAX_PRINT;
10225 
10226 	/*
10227 	 * More paranoid code. Although the buffer size is set to
10228 	 * PAGE_SIZE, and TRACE_MAX_PRINT is 1000, this is just
10229 	 * an extra layer of protection.
10230 	 */
10231 	if (WARN_ON_ONCE(s->seq.len >= s->seq.size))
10232 		s->seq.len = s->seq.size - 1;
10233 
10234 	/* should be zero ended, but we are paranoid. */
10235 	s->buffer[s->seq.len] = 0;
10236 
10237 	printk(KERN_TRACE "%s", s->buffer);
10238 
10239 	trace_seq_init(s);
10240 }
10241 
10242 static void trace_init_iter(struct trace_iterator *iter, struct trace_array *tr)
10243 {
10244 	iter->tr = tr;
10245 	iter->trace = iter->tr->current_trace;
10246 	iter->cpu_file = RING_BUFFER_ALL_CPUS;
10247 	iter->array_buffer = &tr->array_buffer;
10248 
10249 	if (iter->trace && iter->trace->open)
10250 		iter->trace->open(iter);
10251 
10252 	/* Annotate start of buffers if we had overruns */
10253 	if (ring_buffer_overruns(iter->array_buffer->buffer))
10254 		iter->iter_flags |= TRACE_FILE_ANNOTATE;
10255 
10256 	/* Output in nanoseconds only if we are using a clock in nanoseconds. */
10257 	if (trace_clocks[iter->tr->clock_id].in_ns)
10258 		iter->iter_flags |= TRACE_FILE_TIME_IN_NS;
10259 
10260 	/* Can not use kmalloc for iter.temp and iter.fmt */
10261 	iter->temp = static_temp_buf;
10262 	iter->temp_size = STATIC_TEMP_BUF_SIZE;
10263 	iter->fmt = static_fmt_buf;
10264 	iter->fmt_size = STATIC_FMT_BUF_SIZE;
10265 }
10266 
10267 void trace_init_global_iter(struct trace_iterator *iter)
10268 {
10269 	trace_init_iter(iter, &global_trace);
10270 }
10271 
10272 static void ftrace_dump_one(struct trace_array *tr, enum ftrace_dump_mode dump_mode)
10273 {
10274 	/* use static because iter can be a bit big for the stack */
10275 	static struct trace_iterator iter;
10276 	unsigned int old_userobj;
10277 	unsigned long flags;
10278 	int cnt = 0, cpu;
10279 
10280 	/*
10281 	 * Always turn off tracing when we dump.
10282 	 * We don't need to show trace output of what happens
10283 	 * between multiple crashes.
10284 	 *
10285 	 * If the user does a sysrq-z, then they can re-enable
10286 	 * tracing with echo 1 > tracing_on.
10287 	 */
10288 	tracer_tracing_off(tr);
10289 
10290 	local_irq_save(flags);
10291 
10292 	/* Simulate the iterator */
10293 	trace_init_iter(&iter, tr);
10294 
10295 	for_each_tracing_cpu(cpu) {
10296 		atomic_inc(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10297 	}
10298 
10299 	old_userobj = tr->trace_flags & TRACE_ITER_SYM_USEROBJ;
10300 
10301 	/* don't look at user memory in panic mode */
10302 	tr->trace_flags &= ~TRACE_ITER_SYM_USEROBJ;
10303 
10304 	if (dump_mode == DUMP_ORIG)
10305 		iter.cpu_file = raw_smp_processor_id();
10306 	else
10307 		iter.cpu_file = RING_BUFFER_ALL_CPUS;
10308 
10309 	if (tr == &global_trace)
10310 		printk(KERN_TRACE "Dumping ftrace buffer:\n");
10311 	else
10312 		printk(KERN_TRACE "Dumping ftrace instance %s buffer:\n", tr->name);
10313 
10314 	/* Did function tracer already get disabled? */
10315 	if (ftrace_is_dead()) {
10316 		printk("# WARNING: FUNCTION TRACING IS CORRUPTED\n");
10317 		printk("#          MAY BE MISSING FUNCTION EVENTS\n");
10318 	}
10319 
10320 	/*
10321 	 * We need to stop all tracing on all CPUS to read
10322 	 * the next buffer. This is a bit expensive, but is
10323 	 * not done often. We fill all what we can read,
10324 	 * and then release the locks again.
10325 	 */
10326 
10327 	while (!trace_empty(&iter)) {
10328 
10329 		if (!cnt)
10330 			printk(KERN_TRACE "---------------------------------\n");
10331 
10332 		cnt++;
10333 
10334 		trace_iterator_reset(&iter);
10335 		iter.iter_flags |= TRACE_FILE_LAT_FMT;
10336 
10337 		if (trace_find_next_entry_inc(&iter) != NULL) {
10338 			int ret;
10339 
10340 			ret = print_trace_line(&iter);
10341 			if (ret != TRACE_TYPE_NO_CONSUME)
10342 				trace_consume(&iter);
10343 		}
10344 		touch_nmi_watchdog();
10345 
10346 		trace_printk_seq(&iter.seq);
10347 	}
10348 
10349 	if (!cnt)
10350 		printk(KERN_TRACE "   (ftrace buffer empty)\n");
10351 	else
10352 		printk(KERN_TRACE "---------------------------------\n");
10353 
10354 	tr->trace_flags |= old_userobj;
10355 
10356 	for_each_tracing_cpu(cpu) {
10357 		atomic_dec(&per_cpu_ptr(iter.array_buffer->data, cpu)->disabled);
10358 	}
10359 	local_irq_restore(flags);
10360 }
10361 
10362 static void ftrace_dump_by_param(void)
10363 {
10364 	bool first_param = true;
10365 	char dump_param[MAX_TRACER_SIZE];
10366 	char *buf, *token, *inst_name;
10367 	struct trace_array *tr;
10368 
10369 	strscpy(dump_param, ftrace_dump_on_oops, MAX_TRACER_SIZE);
10370 	buf = dump_param;
10371 
10372 	while ((token = strsep(&buf, ",")) != NULL) {
10373 		if (first_param) {
10374 			first_param = false;
10375 			if (!strcmp("0", token))
10376 				continue;
10377 			else if (!strcmp("1", token)) {
10378 				ftrace_dump_one(&global_trace, DUMP_ALL);
10379 				continue;
10380 			}
10381 			else if (!strcmp("2", token) ||
10382 			  !strcmp("orig_cpu", token)) {
10383 				ftrace_dump_one(&global_trace, DUMP_ORIG);
10384 				continue;
10385 			}
10386 		}
10387 
10388 		inst_name = strsep(&token, "=");
10389 		tr = trace_array_find(inst_name);
10390 		if (!tr) {
10391 			printk(KERN_TRACE "Instance %s not found\n", inst_name);
10392 			continue;
10393 		}
10394 
10395 		if (token && (!strcmp("2", token) ||
10396 			  !strcmp("orig_cpu", token)))
10397 			ftrace_dump_one(tr, DUMP_ORIG);
10398 		else
10399 			ftrace_dump_one(tr, DUMP_ALL);
10400 	}
10401 }
10402 
10403 void ftrace_dump(enum ftrace_dump_mode oops_dump_mode)
10404 {
10405 	static atomic_t dump_running;
10406 
10407 	/* Only allow one dump user at a time. */
10408 	if (atomic_inc_return(&dump_running) != 1) {
10409 		atomic_dec(&dump_running);
10410 		return;
10411 	}
10412 
10413 	switch (oops_dump_mode) {
10414 	case DUMP_ALL:
10415 		ftrace_dump_one(&global_trace, DUMP_ALL);
10416 		break;
10417 	case DUMP_ORIG:
10418 		ftrace_dump_one(&global_trace, DUMP_ORIG);
10419 		break;
10420 	case DUMP_PARAM:
10421 		ftrace_dump_by_param();
10422 		break;
10423 	case DUMP_NONE:
10424 		break;
10425 	default:
10426 		printk(KERN_TRACE "Bad dumping mode, switching to all CPUs dump\n");
10427 		ftrace_dump_one(&global_trace, DUMP_ALL);
10428 	}
10429 
10430 	atomic_dec(&dump_running);
10431 }
10432 EXPORT_SYMBOL_GPL(ftrace_dump);
10433 
10434 #define WRITE_BUFSIZE  4096
10435 
10436 ssize_t trace_parse_run_command(struct file *file, const char __user *buffer,
10437 				size_t count, loff_t *ppos,
10438 				int (*createfn)(const char *))
10439 {
10440 	char *kbuf, *buf, *tmp;
10441 	int ret = 0;
10442 	size_t done = 0;
10443 	size_t size;
10444 
10445 	kbuf = kmalloc(WRITE_BUFSIZE, GFP_KERNEL);
10446 	if (!kbuf)
10447 		return -ENOMEM;
10448 
10449 	while (done < count) {
10450 		size = count - done;
10451 
10452 		if (size >= WRITE_BUFSIZE)
10453 			size = WRITE_BUFSIZE - 1;
10454 
10455 		if (copy_from_user(kbuf, buffer + done, size)) {
10456 			ret = -EFAULT;
10457 			goto out;
10458 		}
10459 		kbuf[size] = '\0';
10460 		buf = kbuf;
10461 		do {
10462 			tmp = strchr(buf, '\n');
10463 			if (tmp) {
10464 				*tmp = '\0';
10465 				size = tmp - buf + 1;
10466 			} else {
10467 				size = strlen(buf);
10468 				if (done + size < count) {
10469 					if (buf != kbuf)
10470 						break;
10471 					/* This can accept WRITE_BUFSIZE - 2 ('\n' + '\0') */
10472 					pr_warn("Line length is too long: Should be less than %d\n",
10473 						WRITE_BUFSIZE - 2);
10474 					ret = -EINVAL;
10475 					goto out;
10476 				}
10477 			}
10478 			done += size;
10479 
10480 			/* Remove comments */
10481 			tmp = strchr(buf, '#');
10482 
10483 			if (tmp)
10484 				*tmp = '\0';
10485 
10486 			ret = createfn(buf);
10487 			if (ret)
10488 				goto out;
10489 			buf += size;
10490 
10491 		} while (done < count);
10492 	}
10493 	ret = done;
10494 
10495 out:
10496 	kfree(kbuf);
10497 
10498 	return ret;
10499 }
10500 
10501 #ifdef CONFIG_TRACER_MAX_TRACE
10502 __init static bool tr_needs_alloc_snapshot(const char *name)
10503 {
10504 	char *test;
10505 	int len = strlen(name);
10506 	bool ret;
10507 
10508 	if (!boot_snapshot_index)
10509 		return false;
10510 
10511 	if (strncmp(name, boot_snapshot_info, len) == 0 &&
10512 	    boot_snapshot_info[len] == '\t')
10513 		return true;
10514 
10515 	test = kmalloc(strlen(name) + 3, GFP_KERNEL);
10516 	if (!test)
10517 		return false;
10518 
10519 	sprintf(test, "\t%s\t", name);
10520 	ret = strstr(boot_snapshot_info, test) == NULL;
10521 	kfree(test);
10522 	return ret;
10523 }
10524 
10525 __init static void do_allocate_snapshot(const char *name)
10526 {
10527 	if (!tr_needs_alloc_snapshot(name))
10528 		return;
10529 
10530 	/*
10531 	 * When allocate_snapshot is set, the next call to
10532 	 * allocate_trace_buffers() (called by trace_array_get_by_name())
10533 	 * will allocate the snapshot buffer. That will alse clear
10534 	 * this flag.
10535 	 */
10536 	allocate_snapshot = true;
10537 }
10538 #else
10539 static inline void do_allocate_snapshot(const char *name) { }
10540 #endif
10541 
10542 __init static void enable_instances(void)
10543 {
10544 	struct trace_array *tr;
10545 	char *curr_str;
10546 	char *name;
10547 	char *str;
10548 	char *tok;
10549 
10550 	/* A tab is always appended */
10551 	boot_instance_info[boot_instance_index - 1] = '\0';
10552 	str = boot_instance_info;
10553 
10554 	while ((curr_str = strsep(&str, "\t"))) {
10555 		phys_addr_t start = 0;
10556 		phys_addr_t size = 0;
10557 		unsigned long addr = 0;
10558 		bool traceprintk = false;
10559 		bool traceoff = false;
10560 		char *flag_delim;
10561 		char *addr_delim;
10562 
10563 		tok = strsep(&curr_str, ",");
10564 
10565 		flag_delim = strchr(tok, '^');
10566 		addr_delim = strchr(tok, '@');
10567 
10568 		if (addr_delim)
10569 			*addr_delim++ = '\0';
10570 
10571 		if (flag_delim)
10572 			*flag_delim++ = '\0';
10573 
10574 		name = tok;
10575 
10576 		if (flag_delim) {
10577 			char *flag;
10578 
10579 			while ((flag = strsep(&flag_delim, "^"))) {
10580 				if (strcmp(flag, "traceoff") == 0) {
10581 					traceoff = true;
10582 				} else if ((strcmp(flag, "printk") == 0) ||
10583 					   (strcmp(flag, "traceprintk") == 0) ||
10584 					   (strcmp(flag, "trace_printk") == 0)) {
10585 					traceprintk = true;
10586 				} else {
10587 					pr_info("Tracing: Invalid instance flag '%s' for %s\n",
10588 						flag, name);
10589 				}
10590 			}
10591 		}
10592 
10593 		tok = addr_delim;
10594 		if (tok && isdigit(*tok)) {
10595 			start = memparse(tok, &tok);
10596 			if (!start) {
10597 				pr_warn("Tracing: Invalid boot instance address for %s\n",
10598 					name);
10599 				continue;
10600 			}
10601 			if (*tok != ':') {
10602 				pr_warn("Tracing: No size specified for instance %s\n", name);
10603 				continue;
10604 			}
10605 			tok++;
10606 			size = memparse(tok, &tok);
10607 			if (!size) {
10608 				pr_warn("Tracing: Invalid boot instance size for %s\n",
10609 					name);
10610 				continue;
10611 			}
10612 		} else if (tok) {
10613 			if (!reserve_mem_find_by_name(tok, &start, &size)) {
10614 				start = 0;
10615 				pr_warn("Failed to map boot instance %s to %s\n", name, tok);
10616 				continue;
10617 			}
10618 		}
10619 
10620 		if (start) {
10621 			addr = map_pages(start, size);
10622 			if (addr) {
10623 				pr_info("Tracing: mapped boot instance %s at physical memory %pa of size 0x%lx\n",
10624 					name, &start, (unsigned long)size);
10625 			} else {
10626 				pr_warn("Tracing: Failed to map boot instance %s\n", name);
10627 				continue;
10628 			}
10629 		} else {
10630 			/* Only non mapped buffers have snapshot buffers */
10631 			if (IS_ENABLED(CONFIG_TRACER_MAX_TRACE))
10632 				do_allocate_snapshot(name);
10633 		}
10634 
10635 		tr = trace_array_create_systems(name, NULL, addr, size);
10636 		if (IS_ERR(tr)) {
10637 			pr_warn("Tracing: Failed to create instance buffer %s\n", curr_str);
10638 			continue;
10639 		}
10640 
10641 		if (traceoff)
10642 			tracer_tracing_off(tr);
10643 
10644 		if (traceprintk)
10645 			update_printk_trace(tr);
10646 
10647 		/*
10648 		 * If start is set, then this is a mapped buffer, and
10649 		 * cannot be deleted by user space, so keep the reference
10650 		 * to it.
10651 		 */
10652 		if (start) {
10653 			tr->flags |= TRACE_ARRAY_FL_BOOT;
10654 			tr->ref++;
10655 		}
10656 
10657 		while ((tok = strsep(&curr_str, ","))) {
10658 			early_enable_events(tr, tok, true);
10659 		}
10660 	}
10661 }
10662 
10663 __init static int tracer_alloc_buffers(void)
10664 {
10665 	int ring_buf_size;
10666 	int ret = -ENOMEM;
10667 
10668 
10669 	if (security_locked_down(LOCKDOWN_TRACEFS)) {
10670 		pr_warn("Tracing disabled due to lockdown\n");
10671 		return -EPERM;
10672 	}
10673 
10674 	/*
10675 	 * Make sure we don't accidentally add more trace options
10676 	 * than we have bits for.
10677 	 */
10678 	BUILD_BUG_ON(TRACE_ITER_LAST_BIT > TRACE_FLAGS_MAX_SIZE);
10679 
10680 	if (!alloc_cpumask_var(&tracing_buffer_mask, GFP_KERNEL))
10681 		goto out;
10682 
10683 	if (!alloc_cpumask_var(&global_trace.tracing_cpumask, GFP_KERNEL))
10684 		goto out_free_buffer_mask;
10685 
10686 	/* Only allocate trace_printk buffers if a trace_printk exists */
10687 	if (&__stop___trace_bprintk_fmt != &__start___trace_bprintk_fmt)
10688 		/* Must be called before global_trace.buffer is allocated */
10689 		trace_printk_init_buffers();
10690 
10691 	/* To save memory, keep the ring buffer size to its minimum */
10692 	if (global_trace.ring_buffer_expanded)
10693 		ring_buf_size = trace_buf_size;
10694 	else
10695 		ring_buf_size = 1;
10696 
10697 	cpumask_copy(tracing_buffer_mask, cpu_possible_mask);
10698 	cpumask_copy(global_trace.tracing_cpumask, cpu_all_mask);
10699 
10700 	raw_spin_lock_init(&global_trace.start_lock);
10701 
10702 	/*
10703 	 * The prepare callbacks allocates some memory for the ring buffer. We
10704 	 * don't free the buffer if the CPU goes down. If we were to free
10705 	 * the buffer, then the user would lose any trace that was in the
10706 	 * buffer. The memory will be removed once the "instance" is removed.
10707 	 */
10708 	ret = cpuhp_setup_state_multi(CPUHP_TRACE_RB_PREPARE,
10709 				      "trace/RB:prepare", trace_rb_cpu_prepare,
10710 				      NULL);
10711 	if (ret < 0)
10712 		goto out_free_cpumask;
10713 	/* Used for event triggers */
10714 	ret = -ENOMEM;
10715 	temp_buffer = ring_buffer_alloc(PAGE_SIZE, RB_FL_OVERWRITE);
10716 	if (!temp_buffer)
10717 		goto out_rm_hp_state;
10718 
10719 	if (trace_create_savedcmd() < 0)
10720 		goto out_free_temp_buffer;
10721 
10722 	if (!zalloc_cpumask_var(&global_trace.pipe_cpumask, GFP_KERNEL))
10723 		goto out_free_savedcmd;
10724 
10725 	/* TODO: make the number of buffers hot pluggable with CPUS */
10726 	if (allocate_trace_buffers(&global_trace, ring_buf_size) < 0) {
10727 		MEM_FAIL(1, "tracer: failed to allocate ring buffer!\n");
10728 		goto out_free_pipe_cpumask;
10729 	}
10730 	if (global_trace.buffer_disabled)
10731 		tracing_off();
10732 
10733 	if (trace_boot_clock) {
10734 		ret = tracing_set_clock(&global_trace, trace_boot_clock);
10735 		if (ret < 0)
10736 			pr_warn("Trace clock %s not defined, going back to default\n",
10737 				trace_boot_clock);
10738 	}
10739 
10740 	/*
10741 	 * register_tracer() might reference current_trace, so it
10742 	 * needs to be set before we register anything. This is
10743 	 * just a bootstrap of current_trace anyway.
10744 	 */
10745 	global_trace.current_trace = &nop_trace;
10746 
10747 	global_trace.max_lock = (arch_spinlock_t)__ARCH_SPIN_LOCK_UNLOCKED;
10748 #ifdef CONFIG_TRACER_MAX_TRACE
10749 	spin_lock_init(&global_trace.snapshot_trigger_lock);
10750 #endif
10751 	ftrace_init_global_array_ops(&global_trace);
10752 
10753 	init_trace_flags_index(&global_trace);
10754 
10755 	register_tracer(&nop_trace);
10756 
10757 	/* Function tracing may start here (via kernel command line) */
10758 	init_function_trace();
10759 
10760 	/* All seems OK, enable tracing */
10761 	tracing_disabled = 0;
10762 
10763 	atomic_notifier_chain_register(&panic_notifier_list,
10764 				       &trace_panic_notifier);
10765 
10766 	register_die_notifier(&trace_die_notifier);
10767 
10768 	global_trace.flags = TRACE_ARRAY_FL_GLOBAL;
10769 
10770 	INIT_LIST_HEAD(&global_trace.systems);
10771 	INIT_LIST_HEAD(&global_trace.events);
10772 	INIT_LIST_HEAD(&global_trace.hist_vars);
10773 	INIT_LIST_HEAD(&global_trace.err_log);
10774 	list_add(&global_trace.list, &ftrace_trace_arrays);
10775 
10776 	apply_trace_boot_options();
10777 
10778 	register_snapshot_cmd();
10779 
10780 	test_can_verify();
10781 
10782 	return 0;
10783 
10784 out_free_pipe_cpumask:
10785 	free_cpumask_var(global_trace.pipe_cpumask);
10786 out_free_savedcmd:
10787 	trace_free_saved_cmdlines_buffer();
10788 out_free_temp_buffer:
10789 	ring_buffer_free(temp_buffer);
10790 out_rm_hp_state:
10791 	cpuhp_remove_multi_state(CPUHP_TRACE_RB_PREPARE);
10792 out_free_cpumask:
10793 	free_cpumask_var(global_trace.tracing_cpumask);
10794 out_free_buffer_mask:
10795 	free_cpumask_var(tracing_buffer_mask);
10796 out:
10797 	return ret;
10798 }
10799 
10800 void __init ftrace_boot_snapshot(void)
10801 {
10802 #ifdef CONFIG_TRACER_MAX_TRACE
10803 	struct trace_array *tr;
10804 
10805 	if (!snapshot_at_boot)
10806 		return;
10807 
10808 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
10809 		if (!tr->allocated_snapshot)
10810 			continue;
10811 
10812 		tracing_snapshot_instance(tr);
10813 		trace_array_puts(tr, "** Boot snapshot taken **\n");
10814 	}
10815 #endif
10816 }
10817 
10818 void __init early_trace_init(void)
10819 {
10820 	if (tracepoint_printk) {
10821 		tracepoint_print_iter =
10822 			kzalloc(sizeof(*tracepoint_print_iter), GFP_KERNEL);
10823 		if (MEM_FAIL(!tracepoint_print_iter,
10824 			     "Failed to allocate trace iterator\n"))
10825 			tracepoint_printk = 0;
10826 		else
10827 			static_key_enable(&tracepoint_printk_key.key);
10828 	}
10829 	tracer_alloc_buffers();
10830 
10831 	init_events();
10832 }
10833 
10834 void __init trace_init(void)
10835 {
10836 	trace_event_init();
10837 
10838 	if (boot_instance_index)
10839 		enable_instances();
10840 }
10841 
10842 __init static void clear_boot_tracer(void)
10843 {
10844 	/*
10845 	 * The default tracer at boot buffer is an init section.
10846 	 * This function is called in lateinit. If we did not
10847 	 * find the boot tracer, then clear it out, to prevent
10848 	 * later registration from accessing the buffer that is
10849 	 * about to be freed.
10850 	 */
10851 	if (!default_bootup_tracer)
10852 		return;
10853 
10854 	printk(KERN_INFO "ftrace bootup tracer '%s' not registered.\n",
10855 	       default_bootup_tracer);
10856 	default_bootup_tracer = NULL;
10857 }
10858 
10859 #ifdef CONFIG_HAVE_UNSTABLE_SCHED_CLOCK
10860 __init static void tracing_set_default_clock(void)
10861 {
10862 	/* sched_clock_stable() is determined in late_initcall */
10863 	if (!trace_boot_clock && !sched_clock_stable()) {
10864 		if (security_locked_down(LOCKDOWN_TRACEFS)) {
10865 			pr_warn("Can not set tracing clock due to lockdown\n");
10866 			return;
10867 		}
10868 
10869 		printk(KERN_WARNING
10870 		       "Unstable clock detected, switching default tracing clock to \"global\"\n"
10871 		       "If you want to keep using the local clock, then add:\n"
10872 		       "  \"trace_clock=local\"\n"
10873 		       "on the kernel command line\n");
10874 		tracing_set_clock(&global_trace, "global");
10875 	}
10876 }
10877 #else
10878 static inline void tracing_set_default_clock(void) { }
10879 #endif
10880 
10881 __init static int late_trace_init(void)
10882 {
10883 	if (tracepoint_printk && tracepoint_printk_stop_on_boot) {
10884 		static_key_disable(&tracepoint_printk_key.key);
10885 		tracepoint_printk = 0;
10886 	}
10887 
10888 	tracing_set_default_clock();
10889 	clear_boot_tracer();
10890 	return 0;
10891 }
10892 
10893 late_initcall_sync(late_trace_init);
10894