xref: /linux/kernel/trace/trace_events.c (revision 80c3e28528ff9f269937fcfe73895213a2e14905)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * event tracer
4  *
5  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
6  *
7  *  - Added format output of fields of the trace point.
8  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
9  *
10  */
11 
12 #define pr_fmt(fmt) fmt
13 
14 #include <linux/workqueue.h>
15 #include <linux/security.h>
16 #include <linux/spinlock.h>
17 #include <linux/kthread.h>
18 #include <linux/tracefs.h>
19 #include <linux/uaccess.h>
20 #include <linux/module.h>
21 #include <linux/ctype.h>
22 #include <linux/sort.h>
23 #include <linux/slab.h>
24 #include <linux/delay.h>
25 
26 #include <trace/events/sched.h>
27 #include <trace/syscall.h>
28 
29 #include <asm/setup.h>
30 
31 #include "trace_output.h"
32 
33 #undef TRACE_SYSTEM
34 #define TRACE_SYSTEM "TRACE_SYSTEM"
35 
36 DEFINE_MUTEX(event_mutex);
37 
38 LIST_HEAD(ftrace_events);
39 static LIST_HEAD(ftrace_generic_fields);
40 static LIST_HEAD(ftrace_common_fields);
41 static bool eventdir_initialized;
42 
43 static LIST_HEAD(module_strings);
44 
45 struct module_string {
46 	struct list_head	next;
47 	struct module		*module;
48 	char			*str;
49 };
50 
51 #define GFP_TRACE (GFP_KERNEL | __GFP_ZERO)
52 
53 static struct kmem_cache *field_cachep;
54 static struct kmem_cache *file_cachep;
55 
56 static inline int system_refcount(struct event_subsystem *system)
57 {
58 	return system->ref_count;
59 }
60 
61 static int system_refcount_inc(struct event_subsystem *system)
62 {
63 	return system->ref_count++;
64 }
65 
66 static int system_refcount_dec(struct event_subsystem *system)
67 {
68 	return --system->ref_count;
69 }
70 
71 /* Double loops, do not use break, only goto's work */
72 #define do_for_each_event_file(tr, file)			\
73 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
74 		list_for_each_entry(file, &tr->events, list)
75 
76 #define do_for_each_event_file_safe(tr, file)			\
77 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {	\
78 		struct trace_event_file *___n;				\
79 		list_for_each_entry_safe(file, ___n, &tr->events, list)
80 
81 #define while_for_each_event_file()		\
82 	}
83 
84 static struct ftrace_event_field *
85 __find_event_field(struct list_head *head, const char *name)
86 {
87 	struct ftrace_event_field *field;
88 
89 	list_for_each_entry(field, head, link) {
90 		if (!strcmp(field->name, name))
91 			return field;
92 	}
93 
94 	return NULL;
95 }
96 
97 struct ftrace_event_field *
98 trace_find_event_field(struct trace_event_call *call, char *name)
99 {
100 	struct ftrace_event_field *field;
101 	struct list_head *head;
102 
103 	head = trace_get_fields(call);
104 	field = __find_event_field(head, name);
105 	if (field)
106 		return field;
107 
108 	field = __find_event_field(&ftrace_generic_fields, name);
109 	if (field)
110 		return field;
111 
112 	return __find_event_field(&ftrace_common_fields, name);
113 }
114 
115 static int __trace_define_field(struct list_head *head, const char *type,
116 				const char *name, int offset, int size,
117 				int is_signed, int filter_type, int len,
118 				int need_test)
119 {
120 	struct ftrace_event_field *field;
121 
122 	field = kmem_cache_alloc(field_cachep, GFP_TRACE);
123 	if (!field)
124 		return -ENOMEM;
125 
126 	field->name = name;
127 	field->type = type;
128 
129 	if (filter_type == FILTER_OTHER)
130 		field->filter_type = filter_assign_type(type);
131 	else
132 		field->filter_type = filter_type;
133 
134 	field->offset = offset;
135 	field->size = size;
136 	field->is_signed = is_signed;
137 	field->needs_test = need_test;
138 	field->len = len;
139 
140 	list_add(&field->link, head);
141 
142 	return 0;
143 }
144 
145 int trace_define_field(struct trace_event_call *call, const char *type,
146 		       const char *name, int offset, int size, int is_signed,
147 		       int filter_type)
148 {
149 	struct list_head *head;
150 
151 	if (WARN_ON(!call->class))
152 		return 0;
153 
154 	head = trace_get_fields(call);
155 	return __trace_define_field(head, type, name, offset, size,
156 				    is_signed, filter_type, 0, 0);
157 }
158 EXPORT_SYMBOL_GPL(trace_define_field);
159 
160 static int trace_define_field_ext(struct trace_event_call *call, const char *type,
161 		       const char *name, int offset, int size, int is_signed,
162 		       int filter_type, int len, int need_test)
163 {
164 	struct list_head *head;
165 
166 	if (WARN_ON(!call->class))
167 		return 0;
168 
169 	head = trace_get_fields(call);
170 	return __trace_define_field(head, type, name, offset, size,
171 				    is_signed, filter_type, len, need_test);
172 }
173 
174 #define __generic_field(type, item, filter_type)			\
175 	ret = __trace_define_field(&ftrace_generic_fields, #type,	\
176 				   #item, 0, 0, is_signed_type(type),	\
177 				   filter_type, 0, 0);			\
178 	if (ret)							\
179 		return ret;
180 
181 #define __common_field(type, item)					\
182 	ret = __trace_define_field(&ftrace_common_fields, #type,	\
183 				   "common_" #item,			\
184 				   offsetof(typeof(ent), item),		\
185 				   sizeof(ent.item),			\
186 				   is_signed_type(type), FILTER_OTHER,	\
187 				   0, 0);				\
188 	if (ret)							\
189 		return ret;
190 
191 static int trace_define_generic_fields(void)
192 {
193 	int ret;
194 
195 	__generic_field(int, CPU, FILTER_CPU);
196 	__generic_field(int, cpu, FILTER_CPU);
197 	__generic_field(int, common_cpu, FILTER_CPU);
198 	__generic_field(char *, COMM, FILTER_COMM);
199 	__generic_field(char *, comm, FILTER_COMM);
200 	__generic_field(char *, stacktrace, FILTER_STACKTRACE);
201 	__generic_field(char *, STACKTRACE, FILTER_STACKTRACE);
202 
203 	return ret;
204 }
205 
206 static int trace_define_common_fields(void)
207 {
208 	int ret;
209 	struct trace_entry ent;
210 
211 	__common_field(unsigned short, type);
212 	__common_field(unsigned char, flags);
213 	/* Holds both preempt_count and migrate_disable */
214 	__common_field(unsigned char, preempt_count);
215 	__common_field(int, pid);
216 
217 	return ret;
218 }
219 
220 static void trace_destroy_fields(struct trace_event_call *call)
221 {
222 	struct ftrace_event_field *field, *next;
223 	struct list_head *head;
224 
225 	head = trace_get_fields(call);
226 	list_for_each_entry_safe(field, next, head, link) {
227 		list_del(&field->link);
228 		kmem_cache_free(field_cachep, field);
229 	}
230 }
231 
232 /*
233  * run-time version of trace_event_get_offsets_<call>() that returns the last
234  * accessible offset of trace fields excluding __dynamic_array bytes
235  */
236 int trace_event_get_offsets(struct trace_event_call *call)
237 {
238 	struct ftrace_event_field *tail;
239 	struct list_head *head;
240 
241 	head = trace_get_fields(call);
242 	/*
243 	 * head->next points to the last field with the largest offset,
244 	 * since it was added last by trace_define_field()
245 	 */
246 	tail = list_first_entry(head, struct ftrace_event_field, link);
247 	return tail->offset + tail->size;
248 }
249 
250 
251 static struct trace_event_fields *find_event_field(const char *fmt,
252 						   struct trace_event_call *call)
253 {
254 	struct trace_event_fields *field = call->class->fields_array;
255 	const char *p = fmt;
256 	int len;
257 
258 	if (!(len = str_has_prefix(fmt, "REC->")))
259 		return NULL;
260 	fmt += len;
261 	for (p = fmt; *p; p++) {
262 		if (!isalnum(*p) && *p != '_')
263 			break;
264 	}
265 	len = p - fmt;
266 
267 	for (; field->type; field++) {
268 		if (strncmp(field->name, fmt, len) || field->name[len])
269 			continue;
270 
271 		return field;
272 	}
273 	return NULL;
274 }
275 
276 /*
277  * Check if the referenced field is an array and return true,
278  * as arrays are OK to dereference.
279  */
280 static bool test_field(const char *fmt, struct trace_event_call *call)
281 {
282 	struct trace_event_fields *field;
283 
284 	field = find_event_field(fmt, call);
285 	if (!field)
286 		return false;
287 
288 	/* This is an array and is OK to dereference. */
289 	return strchr(field->type, '[') != NULL;
290 }
291 
292 /* Look for a string within an argument */
293 static bool find_print_string(const char *arg, const char *str, const char *end)
294 {
295 	const char *r;
296 
297 	r = strstr(arg, str);
298 	return r && r < end;
299 }
300 
301 /* Return true if the argument pointer is safe */
302 static bool process_pointer(const char *fmt, int len, struct trace_event_call *call)
303 {
304 	const char *r, *e, *a;
305 
306 	e = fmt + len;
307 
308 	/* Find the REC-> in the argument */
309 	r = strstr(fmt, "REC->");
310 	if (r && r < e) {
311 		/*
312 		 * Addresses of events on the buffer, or an array on the buffer is
313 		 * OK to dereference. There's ways to fool this, but
314 		 * this is to catch common mistakes, not malicious code.
315 		 */
316 		a = strchr(fmt, '&');
317 		if ((a && (a < r)) || test_field(r, call))
318 			return true;
319 	} else if (find_print_string(fmt, "__get_dynamic_array(", e)) {
320 		return true;
321 	} else if (find_print_string(fmt, "__get_rel_dynamic_array(", e)) {
322 		return true;
323 	} else if (find_print_string(fmt, "__get_dynamic_array_len(", e)) {
324 		return true;
325 	} else if (find_print_string(fmt, "__get_rel_dynamic_array_len(", e)) {
326 		return true;
327 	} else if (find_print_string(fmt, "__get_sockaddr(", e)) {
328 		return true;
329 	} else if (find_print_string(fmt, "__get_rel_sockaddr(", e)) {
330 		return true;
331 	}
332 	return false;
333 }
334 
335 /* Return true if the string is safe */
336 static bool process_string(const char *fmt, int len, struct trace_event_call *call)
337 {
338 	struct trace_event_fields *field;
339 	const char *r, *e, *s;
340 
341 	e = fmt + len;
342 
343 	/*
344 	 * There are several helper functions that return strings.
345 	 * If the argument contains a function, then assume its field is valid.
346 	 * It is considered that the argument has a function if it has:
347 	 *   alphanumeric or '_' before a parenthesis.
348 	 */
349 	s = fmt;
350 	do {
351 		r = strstr(s, "(");
352 		if (!r || r >= e)
353 			break;
354 		for (int i = 1; r - i >= s; i++) {
355 			char ch = *(r - i);
356 			if (isspace(ch))
357 				continue;
358 			if (isalnum(ch) || ch == '_')
359 				return true;
360 			/* Anything else, this isn't a function */
361 			break;
362 		}
363 		/* A function could be wrapped in parethesis, try the next one */
364 		s = r + 1;
365 	} while (s < e);
366 
367 	/*
368 	 * If there's any strings in the argument consider this arg OK as it
369 	 * could be: REC->field ? "foo" : "bar" and we don't want to get into
370 	 * verifying that logic here.
371 	 */
372 	if (find_print_string(fmt, "\"", e))
373 		return true;
374 
375 	/* Dereferenced strings are also valid like any other pointer */
376 	if (process_pointer(fmt, len, call))
377 		return true;
378 
379 	/* Make sure the field is found */
380 	field = find_event_field(fmt, call);
381 	if (!field)
382 		return false;
383 
384 	/* Test this field's string before printing the event */
385 	call->flags |= TRACE_EVENT_FL_TEST_STR;
386 	field->needs_test = 1;
387 
388 	return true;
389 }
390 
391 /*
392  * Examine the print fmt of the event looking for unsafe dereference
393  * pointers using %p* that could be recorded in the trace event and
394  * much later referenced after the pointer was freed. Dereferencing
395  * pointers are OK, if it is dereferenced into the event itself.
396  */
397 static void test_event_printk(struct trace_event_call *call)
398 {
399 	u64 dereference_flags = 0;
400 	u64 string_flags = 0;
401 	bool first = true;
402 	const char *fmt;
403 	int parens = 0;
404 	char in_quote = 0;
405 	int start_arg = 0;
406 	int arg = 0;
407 	int i, e;
408 
409 	fmt = call->print_fmt;
410 
411 	if (!fmt)
412 		return;
413 
414 	for (i = 0; fmt[i]; i++) {
415 		switch (fmt[i]) {
416 		case '\\':
417 			i++;
418 			if (!fmt[i])
419 				return;
420 			continue;
421 		case '"':
422 		case '\'':
423 			/*
424 			 * The print fmt starts with a string that
425 			 * is processed first to find %p* usage,
426 			 * then after the first string, the print fmt
427 			 * contains arguments that are used to check
428 			 * if the dereferenced %p* usage is safe.
429 			 */
430 			if (first) {
431 				if (fmt[i] == '\'')
432 					continue;
433 				if (in_quote) {
434 					arg = 0;
435 					first = false;
436 					/*
437 					 * If there was no %p* uses
438 					 * the fmt is OK.
439 					 */
440 					if (!dereference_flags)
441 						return;
442 				}
443 			}
444 			if (in_quote) {
445 				if (in_quote == fmt[i])
446 					in_quote = 0;
447 			} else {
448 				in_quote = fmt[i];
449 			}
450 			continue;
451 		case '%':
452 			if (!first || !in_quote)
453 				continue;
454 			i++;
455 			if (!fmt[i])
456 				return;
457 			switch (fmt[i]) {
458 			case '%':
459 				continue;
460 			case 'p':
461 				/* Find dereferencing fields */
462 				switch (fmt[i + 1]) {
463 				case 'B': case 'R': case 'r':
464 				case 'b': case 'M': case 'm':
465 				case 'I': case 'i': case 'E':
466 				case 'U': case 'V': case 'N':
467 				case 'a': case 'd': case 'D':
468 				case 'g': case 't': case 'C':
469 				case 'O': case 'f':
470 					if (WARN_ONCE(arg == 63,
471 						      "Too many args for event: %s",
472 						      trace_event_name(call)))
473 						return;
474 					dereference_flags |= 1ULL << arg;
475 				}
476 				break;
477 			default:
478 			{
479 				bool star = false;
480 				int j;
481 
482 				/* Increment arg if %*s exists. */
483 				for (j = 0; fmt[i + j]; j++) {
484 					if (isdigit(fmt[i + j]) ||
485 					    fmt[i + j] == '.')
486 						continue;
487 					if (fmt[i + j] == '*') {
488 						star = true;
489 						continue;
490 					}
491 					if ((fmt[i + j] == 's')) {
492 						if (star)
493 							arg++;
494 						if (WARN_ONCE(arg == 63,
495 							      "Too many args for event: %s",
496 							      trace_event_name(call)))
497 							return;
498 						dereference_flags |= 1ULL << arg;
499 						string_flags |= 1ULL << arg;
500 					}
501 					break;
502 				}
503 				break;
504 			} /* default */
505 
506 			} /* switch */
507 			arg++;
508 			continue;
509 		case '(':
510 			if (in_quote)
511 				continue;
512 			parens++;
513 			continue;
514 		case ')':
515 			if (in_quote)
516 				continue;
517 			parens--;
518 			if (WARN_ONCE(parens < 0,
519 				      "Paren mismatch for event: %s\narg='%s'\n%*s",
520 				      trace_event_name(call),
521 				      fmt + start_arg,
522 				      (i - start_arg) + 5, "^"))
523 				return;
524 			continue;
525 		case ',':
526 			if (in_quote || parens)
527 				continue;
528 			e = i;
529 			i++;
530 			while (isspace(fmt[i]))
531 				i++;
532 
533 			/*
534 			 * If start_arg is zero, then this is the start of the
535 			 * first argument. The processing of the argument happens
536 			 * when the end of the argument is found, as it needs to
537 			 * handle paranthesis and such.
538 			 */
539 			if (!start_arg) {
540 				start_arg = i;
541 				/* Balance out the i++ in the for loop */
542 				i--;
543 				continue;
544 			}
545 
546 			if (dereference_flags & (1ULL << arg)) {
547 				if (string_flags & (1ULL << arg)) {
548 					if (process_string(fmt + start_arg, e - start_arg, call))
549 						dereference_flags &= ~(1ULL << arg);
550 				} else if (process_pointer(fmt + start_arg, e - start_arg, call))
551 					dereference_flags &= ~(1ULL << arg);
552 			}
553 
554 			start_arg = i;
555 			arg++;
556 			/* Balance out the i++ in the for loop */
557 			i--;
558 		}
559 	}
560 
561 	if (dereference_flags & (1ULL << arg)) {
562 		if (string_flags & (1ULL << arg)) {
563 			if (process_string(fmt + start_arg, i - start_arg, call))
564 				dereference_flags &= ~(1ULL << arg);
565 		} else if (process_pointer(fmt + start_arg, i - start_arg, call))
566 			dereference_flags &= ~(1ULL << arg);
567 	}
568 
569 	/*
570 	 * If you triggered the below warning, the trace event reported
571 	 * uses an unsafe dereference pointer %p*. As the data stored
572 	 * at the trace event time may no longer exist when the trace
573 	 * event is printed, dereferencing to the original source is
574 	 * unsafe. The source of the dereference must be copied into the
575 	 * event itself, and the dereference must access the copy instead.
576 	 */
577 	if (WARN_ON_ONCE(dereference_flags)) {
578 		arg = 1;
579 		while (!(dereference_flags & 1)) {
580 			dereference_flags >>= 1;
581 			arg++;
582 		}
583 		pr_warn("event %s has unsafe dereference of argument %d\n",
584 			trace_event_name(call), arg);
585 		pr_warn("print_fmt: %s\n", fmt);
586 	}
587 }
588 
589 int trace_event_raw_init(struct trace_event_call *call)
590 {
591 	int id;
592 
593 	id = register_trace_event(&call->event);
594 	if (!id)
595 		return -ENODEV;
596 
597 	test_event_printk(call);
598 
599 	return 0;
600 }
601 EXPORT_SYMBOL_GPL(trace_event_raw_init);
602 
603 bool trace_event_ignore_this_pid(struct trace_event_file *trace_file)
604 {
605 	struct trace_array *tr = trace_file->tr;
606 	struct trace_array_cpu *data;
607 	struct trace_pid_list *no_pid_list;
608 	struct trace_pid_list *pid_list;
609 
610 	pid_list = rcu_dereference_raw(tr->filtered_pids);
611 	no_pid_list = rcu_dereference_raw(tr->filtered_no_pids);
612 
613 	if (!pid_list && !no_pid_list)
614 		return false;
615 
616 	data = this_cpu_ptr(tr->array_buffer.data);
617 
618 	return data->ignore_pid;
619 }
620 EXPORT_SYMBOL_GPL(trace_event_ignore_this_pid);
621 
622 void *trace_event_buffer_reserve(struct trace_event_buffer *fbuffer,
623 				 struct trace_event_file *trace_file,
624 				 unsigned long len)
625 {
626 	struct trace_event_call *event_call = trace_file->event_call;
627 
628 	if ((trace_file->flags & EVENT_FILE_FL_PID_FILTER) &&
629 	    trace_event_ignore_this_pid(trace_file))
630 		return NULL;
631 
632 	/*
633 	 * If CONFIG_PREEMPTION is enabled, then the tracepoint itself disables
634 	 * preemption (adding one to the preempt_count). Since we are
635 	 * interested in the preempt_count at the time the tracepoint was
636 	 * hit, we need to subtract one to offset the increment.
637 	 */
638 	fbuffer->trace_ctx = tracing_gen_ctx_dec();
639 	fbuffer->trace_file = trace_file;
640 
641 	fbuffer->event =
642 		trace_event_buffer_lock_reserve(&fbuffer->buffer, trace_file,
643 						event_call->event.type, len,
644 						fbuffer->trace_ctx);
645 	if (!fbuffer->event)
646 		return NULL;
647 
648 	fbuffer->regs = NULL;
649 	fbuffer->entry = ring_buffer_event_data(fbuffer->event);
650 	return fbuffer->entry;
651 }
652 EXPORT_SYMBOL_GPL(trace_event_buffer_reserve);
653 
654 int trace_event_reg(struct trace_event_call *call,
655 		    enum trace_reg type, void *data)
656 {
657 	struct trace_event_file *file = data;
658 
659 	WARN_ON(!(call->flags & TRACE_EVENT_FL_TRACEPOINT));
660 	switch (type) {
661 	case TRACE_REG_REGISTER:
662 		return tracepoint_probe_register(call->tp,
663 						 call->class->probe,
664 						 file);
665 	case TRACE_REG_UNREGISTER:
666 		tracepoint_probe_unregister(call->tp,
667 					    call->class->probe,
668 					    file);
669 		return 0;
670 
671 #ifdef CONFIG_PERF_EVENTS
672 	case TRACE_REG_PERF_REGISTER:
673 		return tracepoint_probe_register(call->tp,
674 						 call->class->perf_probe,
675 						 call);
676 	case TRACE_REG_PERF_UNREGISTER:
677 		tracepoint_probe_unregister(call->tp,
678 					    call->class->perf_probe,
679 					    call);
680 		return 0;
681 	case TRACE_REG_PERF_OPEN:
682 	case TRACE_REG_PERF_CLOSE:
683 	case TRACE_REG_PERF_ADD:
684 	case TRACE_REG_PERF_DEL:
685 		return 0;
686 #endif
687 	}
688 	return 0;
689 }
690 EXPORT_SYMBOL_GPL(trace_event_reg);
691 
692 void trace_event_enable_cmd_record(bool enable)
693 {
694 	struct trace_event_file *file;
695 	struct trace_array *tr;
696 
697 	lockdep_assert_held(&event_mutex);
698 
699 	do_for_each_event_file(tr, file) {
700 
701 		if (!(file->flags & EVENT_FILE_FL_ENABLED))
702 			continue;
703 
704 		if (enable) {
705 			tracing_start_cmdline_record();
706 			set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
707 		} else {
708 			tracing_stop_cmdline_record();
709 			clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
710 		}
711 	} while_for_each_event_file();
712 }
713 
714 void trace_event_enable_tgid_record(bool enable)
715 {
716 	struct trace_event_file *file;
717 	struct trace_array *tr;
718 
719 	lockdep_assert_held(&event_mutex);
720 
721 	do_for_each_event_file(tr, file) {
722 		if (!(file->flags & EVENT_FILE_FL_ENABLED))
723 			continue;
724 
725 		if (enable) {
726 			tracing_start_tgid_record();
727 			set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
728 		} else {
729 			tracing_stop_tgid_record();
730 			clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT,
731 				  &file->flags);
732 		}
733 	} while_for_each_event_file();
734 }
735 
736 static int __ftrace_event_enable_disable(struct trace_event_file *file,
737 					 int enable, int soft_disable)
738 {
739 	struct trace_event_call *call = file->event_call;
740 	struct trace_array *tr = file->tr;
741 	int ret = 0;
742 	int disable;
743 
744 	switch (enable) {
745 	case 0:
746 		/*
747 		 * When soft_disable is set and enable is cleared, the sm_ref
748 		 * reference counter is decremented. If it reaches 0, we want
749 		 * to clear the SOFT_DISABLED flag but leave the event in the
750 		 * state that it was. That is, if the event was enabled and
751 		 * SOFT_DISABLED isn't set, then do nothing. But if SOFT_DISABLED
752 		 * is set we do not want the event to be enabled before we
753 		 * clear the bit.
754 		 *
755 		 * When soft_disable is not set but the SOFT_MODE flag is,
756 		 * we do nothing. Do not disable the tracepoint, otherwise
757 		 * "soft enable"s (clearing the SOFT_DISABLED bit) wont work.
758 		 */
759 		if (soft_disable) {
760 			if (atomic_dec_return(&file->sm_ref) > 0)
761 				break;
762 			disable = file->flags & EVENT_FILE_FL_SOFT_DISABLED;
763 			clear_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
764 			/* Disable use of trace_buffered_event */
765 			trace_buffered_event_disable();
766 		} else
767 			disable = !(file->flags & EVENT_FILE_FL_SOFT_MODE);
768 
769 		if (disable && (file->flags & EVENT_FILE_FL_ENABLED)) {
770 			clear_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
771 			if (file->flags & EVENT_FILE_FL_RECORDED_CMD) {
772 				tracing_stop_cmdline_record();
773 				clear_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
774 			}
775 
776 			if (file->flags & EVENT_FILE_FL_RECORDED_TGID) {
777 				tracing_stop_tgid_record();
778 				clear_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
779 			}
780 
781 			call->class->reg(call, TRACE_REG_UNREGISTER, file);
782 		}
783 		/* If in SOFT_MODE, just set the SOFT_DISABLE_BIT, else clear it */
784 		if (file->flags & EVENT_FILE_FL_SOFT_MODE)
785 			set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
786 		else
787 			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
788 		break;
789 	case 1:
790 		/*
791 		 * When soft_disable is set and enable is set, we want to
792 		 * register the tracepoint for the event, but leave the event
793 		 * as is. That means, if the event was already enabled, we do
794 		 * nothing (but set SOFT_MODE). If the event is disabled, we
795 		 * set SOFT_DISABLED before enabling the event tracepoint, so
796 		 * it still seems to be disabled.
797 		 */
798 		if (!soft_disable)
799 			clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
800 		else {
801 			if (atomic_inc_return(&file->sm_ref) > 1)
802 				break;
803 			set_bit(EVENT_FILE_FL_SOFT_MODE_BIT, &file->flags);
804 			/* Enable use of trace_buffered_event */
805 			trace_buffered_event_enable();
806 		}
807 
808 		if (!(file->flags & EVENT_FILE_FL_ENABLED)) {
809 			bool cmd = false, tgid = false;
810 
811 			/* Keep the event disabled, when going to SOFT_MODE. */
812 			if (soft_disable)
813 				set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &file->flags);
814 
815 			if (tr->trace_flags & TRACE_ITER_RECORD_CMD) {
816 				cmd = true;
817 				tracing_start_cmdline_record();
818 				set_bit(EVENT_FILE_FL_RECORDED_CMD_BIT, &file->flags);
819 			}
820 
821 			if (tr->trace_flags & TRACE_ITER_RECORD_TGID) {
822 				tgid = true;
823 				tracing_start_tgid_record();
824 				set_bit(EVENT_FILE_FL_RECORDED_TGID_BIT, &file->flags);
825 			}
826 
827 			ret = call->class->reg(call, TRACE_REG_REGISTER, file);
828 			if (ret) {
829 				if (cmd)
830 					tracing_stop_cmdline_record();
831 				if (tgid)
832 					tracing_stop_tgid_record();
833 				pr_info("event trace: Could not enable event "
834 					"%s\n", trace_event_name(call));
835 				break;
836 			}
837 			set_bit(EVENT_FILE_FL_ENABLED_BIT, &file->flags);
838 
839 			/* WAS_ENABLED gets set but never cleared. */
840 			set_bit(EVENT_FILE_FL_WAS_ENABLED_BIT, &file->flags);
841 		}
842 		break;
843 	}
844 
845 	return ret;
846 }
847 
848 int trace_event_enable_disable(struct trace_event_file *file,
849 			       int enable, int soft_disable)
850 {
851 	return __ftrace_event_enable_disable(file, enable, soft_disable);
852 }
853 
854 static int ftrace_event_enable_disable(struct trace_event_file *file,
855 				       int enable)
856 {
857 	return __ftrace_event_enable_disable(file, enable, 0);
858 }
859 
860 static void ftrace_clear_events(struct trace_array *tr)
861 {
862 	struct trace_event_file *file;
863 
864 	mutex_lock(&event_mutex);
865 	list_for_each_entry(file, &tr->events, list) {
866 		ftrace_event_enable_disable(file, 0);
867 	}
868 	mutex_unlock(&event_mutex);
869 }
870 
871 static void
872 event_filter_pid_sched_process_exit(void *data, struct task_struct *task)
873 {
874 	struct trace_pid_list *pid_list;
875 	struct trace_array *tr = data;
876 
877 	pid_list = rcu_dereference_raw(tr->filtered_pids);
878 	trace_filter_add_remove_task(pid_list, NULL, task);
879 
880 	pid_list = rcu_dereference_raw(tr->filtered_no_pids);
881 	trace_filter_add_remove_task(pid_list, NULL, task);
882 }
883 
884 static void
885 event_filter_pid_sched_process_fork(void *data,
886 				    struct task_struct *self,
887 				    struct task_struct *task)
888 {
889 	struct trace_pid_list *pid_list;
890 	struct trace_array *tr = data;
891 
892 	pid_list = rcu_dereference_sched(tr->filtered_pids);
893 	trace_filter_add_remove_task(pid_list, self, task);
894 
895 	pid_list = rcu_dereference_sched(tr->filtered_no_pids);
896 	trace_filter_add_remove_task(pid_list, self, task);
897 }
898 
899 void trace_event_follow_fork(struct trace_array *tr, bool enable)
900 {
901 	if (enable) {
902 		register_trace_prio_sched_process_fork(event_filter_pid_sched_process_fork,
903 						       tr, INT_MIN);
904 		register_trace_prio_sched_process_free(event_filter_pid_sched_process_exit,
905 						       tr, INT_MAX);
906 	} else {
907 		unregister_trace_sched_process_fork(event_filter_pid_sched_process_fork,
908 						    tr);
909 		unregister_trace_sched_process_free(event_filter_pid_sched_process_exit,
910 						    tr);
911 	}
912 }
913 
914 static void
915 event_filter_pid_sched_switch_probe_pre(void *data, bool preempt,
916 					struct task_struct *prev,
917 					struct task_struct *next,
918 					unsigned int prev_state)
919 {
920 	struct trace_array *tr = data;
921 	struct trace_pid_list *no_pid_list;
922 	struct trace_pid_list *pid_list;
923 	bool ret;
924 
925 	pid_list = rcu_dereference_sched(tr->filtered_pids);
926 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
927 
928 	/*
929 	 * Sched switch is funny, as we only want to ignore it
930 	 * in the notrace case if both prev and next should be ignored.
931 	 */
932 	ret = trace_ignore_this_task(NULL, no_pid_list, prev) &&
933 		trace_ignore_this_task(NULL, no_pid_list, next);
934 
935 	this_cpu_write(tr->array_buffer.data->ignore_pid, ret ||
936 		       (trace_ignore_this_task(pid_list, NULL, prev) &&
937 			trace_ignore_this_task(pid_list, NULL, next)));
938 }
939 
940 static void
941 event_filter_pid_sched_switch_probe_post(void *data, bool preempt,
942 					 struct task_struct *prev,
943 					 struct task_struct *next,
944 					 unsigned int prev_state)
945 {
946 	struct trace_array *tr = data;
947 	struct trace_pid_list *no_pid_list;
948 	struct trace_pid_list *pid_list;
949 
950 	pid_list = rcu_dereference_sched(tr->filtered_pids);
951 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
952 
953 	this_cpu_write(tr->array_buffer.data->ignore_pid,
954 		       trace_ignore_this_task(pid_list, no_pid_list, next));
955 }
956 
957 static void
958 event_filter_pid_sched_wakeup_probe_pre(void *data, struct task_struct *task)
959 {
960 	struct trace_array *tr = data;
961 	struct trace_pid_list *no_pid_list;
962 	struct trace_pid_list *pid_list;
963 
964 	/* Nothing to do if we are already tracing */
965 	if (!this_cpu_read(tr->array_buffer.data->ignore_pid))
966 		return;
967 
968 	pid_list = rcu_dereference_sched(tr->filtered_pids);
969 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
970 
971 	this_cpu_write(tr->array_buffer.data->ignore_pid,
972 		       trace_ignore_this_task(pid_list, no_pid_list, task));
973 }
974 
975 static void
976 event_filter_pid_sched_wakeup_probe_post(void *data, struct task_struct *task)
977 {
978 	struct trace_array *tr = data;
979 	struct trace_pid_list *no_pid_list;
980 	struct trace_pid_list *pid_list;
981 
982 	/* Nothing to do if we are not tracing */
983 	if (this_cpu_read(tr->array_buffer.data->ignore_pid))
984 		return;
985 
986 	pid_list = rcu_dereference_sched(tr->filtered_pids);
987 	no_pid_list = rcu_dereference_sched(tr->filtered_no_pids);
988 
989 	/* Set tracing if current is enabled */
990 	this_cpu_write(tr->array_buffer.data->ignore_pid,
991 		       trace_ignore_this_task(pid_list, no_pid_list, current));
992 }
993 
994 static void unregister_pid_events(struct trace_array *tr)
995 {
996 	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_pre, tr);
997 	unregister_trace_sched_switch(event_filter_pid_sched_switch_probe_post, tr);
998 
999 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre, tr);
1000 	unregister_trace_sched_wakeup(event_filter_pid_sched_wakeup_probe_post, tr);
1001 
1002 	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre, tr);
1003 	unregister_trace_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post, tr);
1004 
1005 	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_pre, tr);
1006 	unregister_trace_sched_waking(event_filter_pid_sched_wakeup_probe_post, tr);
1007 }
1008 
1009 static void __ftrace_clear_event_pids(struct trace_array *tr, int type)
1010 {
1011 	struct trace_pid_list *pid_list;
1012 	struct trace_pid_list *no_pid_list;
1013 	struct trace_event_file *file;
1014 	int cpu;
1015 
1016 	pid_list = rcu_dereference_protected(tr->filtered_pids,
1017 					     lockdep_is_held(&event_mutex));
1018 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
1019 					     lockdep_is_held(&event_mutex));
1020 
1021 	/* Make sure there's something to do */
1022 	if (!pid_type_enabled(type, pid_list, no_pid_list))
1023 		return;
1024 
1025 	if (!still_need_pid_events(type, pid_list, no_pid_list)) {
1026 		unregister_pid_events(tr);
1027 
1028 		list_for_each_entry(file, &tr->events, list) {
1029 			clear_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
1030 		}
1031 
1032 		for_each_possible_cpu(cpu)
1033 			per_cpu_ptr(tr->array_buffer.data, cpu)->ignore_pid = false;
1034 	}
1035 
1036 	if (type & TRACE_PIDS)
1037 		rcu_assign_pointer(tr->filtered_pids, NULL);
1038 
1039 	if (type & TRACE_NO_PIDS)
1040 		rcu_assign_pointer(tr->filtered_no_pids, NULL);
1041 
1042 	/* Wait till all users are no longer using pid filtering */
1043 	tracepoint_synchronize_unregister();
1044 
1045 	if ((type & TRACE_PIDS) && pid_list)
1046 		trace_pid_list_free(pid_list);
1047 
1048 	if ((type & TRACE_NO_PIDS) && no_pid_list)
1049 		trace_pid_list_free(no_pid_list);
1050 }
1051 
1052 static void ftrace_clear_event_pids(struct trace_array *tr, int type)
1053 {
1054 	mutex_lock(&event_mutex);
1055 	__ftrace_clear_event_pids(tr, type);
1056 	mutex_unlock(&event_mutex);
1057 }
1058 
1059 static void __put_system(struct event_subsystem *system)
1060 {
1061 	struct event_filter *filter = system->filter;
1062 
1063 	WARN_ON_ONCE(system_refcount(system) == 0);
1064 	if (system_refcount_dec(system))
1065 		return;
1066 
1067 	list_del(&system->list);
1068 
1069 	if (filter) {
1070 		kfree(filter->filter_string);
1071 		kfree(filter);
1072 	}
1073 	kfree_const(system->name);
1074 	kfree(system);
1075 }
1076 
1077 static void __get_system(struct event_subsystem *system)
1078 {
1079 	WARN_ON_ONCE(system_refcount(system) == 0);
1080 	system_refcount_inc(system);
1081 }
1082 
1083 static void __get_system_dir(struct trace_subsystem_dir *dir)
1084 {
1085 	WARN_ON_ONCE(dir->ref_count == 0);
1086 	dir->ref_count++;
1087 	__get_system(dir->subsystem);
1088 }
1089 
1090 static void __put_system_dir(struct trace_subsystem_dir *dir)
1091 {
1092 	WARN_ON_ONCE(dir->ref_count == 0);
1093 	/* If the subsystem is about to be freed, the dir must be too */
1094 	WARN_ON_ONCE(system_refcount(dir->subsystem) == 1 && dir->ref_count != 1);
1095 
1096 	__put_system(dir->subsystem);
1097 	if (!--dir->ref_count)
1098 		kfree(dir);
1099 }
1100 
1101 static void put_system(struct trace_subsystem_dir *dir)
1102 {
1103 	mutex_lock(&event_mutex);
1104 	__put_system_dir(dir);
1105 	mutex_unlock(&event_mutex);
1106 }
1107 
1108 static void remove_subsystem(struct trace_subsystem_dir *dir)
1109 {
1110 	if (!dir)
1111 		return;
1112 
1113 	if (!--dir->nr_events) {
1114 		eventfs_remove_dir(dir->ei);
1115 		list_del(&dir->list);
1116 		__put_system_dir(dir);
1117 	}
1118 }
1119 
1120 void event_file_get(struct trace_event_file *file)
1121 {
1122 	refcount_inc(&file->ref);
1123 }
1124 
1125 void event_file_put(struct trace_event_file *file)
1126 {
1127 	if (WARN_ON_ONCE(!refcount_read(&file->ref))) {
1128 		if (file->flags & EVENT_FILE_FL_FREED)
1129 			kmem_cache_free(file_cachep, file);
1130 		return;
1131 	}
1132 
1133 	if (refcount_dec_and_test(&file->ref)) {
1134 		/* Count should only go to zero when it is freed */
1135 		if (WARN_ON_ONCE(!(file->flags & EVENT_FILE_FL_FREED)))
1136 			return;
1137 		kmem_cache_free(file_cachep, file);
1138 	}
1139 }
1140 
1141 static void remove_event_file_dir(struct trace_event_file *file)
1142 {
1143 	eventfs_remove_dir(file->ei);
1144 	list_del(&file->list);
1145 	remove_subsystem(file->system);
1146 	free_event_filter(file->filter);
1147 	file->flags |= EVENT_FILE_FL_FREED;
1148 	event_file_put(file);
1149 }
1150 
1151 /*
1152  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
1153  */
1154 static int
1155 __ftrace_set_clr_event_nolock(struct trace_array *tr, const char *match,
1156 			      const char *sub, const char *event, int set)
1157 {
1158 	struct trace_event_file *file;
1159 	struct trace_event_call *call;
1160 	const char *name;
1161 	int ret = -EINVAL;
1162 	int eret = 0;
1163 
1164 	list_for_each_entry(file, &tr->events, list) {
1165 
1166 		call = file->event_call;
1167 		name = trace_event_name(call);
1168 
1169 		if (!name || !call->class || !call->class->reg)
1170 			continue;
1171 
1172 		if (call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
1173 			continue;
1174 
1175 		if (match &&
1176 		    strcmp(match, name) != 0 &&
1177 		    strcmp(match, call->class->system) != 0)
1178 			continue;
1179 
1180 		if (sub && strcmp(sub, call->class->system) != 0)
1181 			continue;
1182 
1183 		if (event && strcmp(event, name) != 0)
1184 			continue;
1185 
1186 		ret = ftrace_event_enable_disable(file, set);
1187 
1188 		/*
1189 		 * Save the first error and return that. Some events
1190 		 * may still have been enabled, but let the user
1191 		 * know that something went wrong.
1192 		 */
1193 		if (ret && !eret)
1194 			eret = ret;
1195 
1196 		ret = eret;
1197 	}
1198 
1199 	return ret;
1200 }
1201 
1202 static int __ftrace_set_clr_event(struct trace_array *tr, const char *match,
1203 				  const char *sub, const char *event, int set)
1204 {
1205 	int ret;
1206 
1207 	mutex_lock(&event_mutex);
1208 	ret = __ftrace_set_clr_event_nolock(tr, match, sub, event, set);
1209 	mutex_unlock(&event_mutex);
1210 
1211 	return ret;
1212 }
1213 
1214 int ftrace_set_clr_event(struct trace_array *tr, char *buf, int set)
1215 {
1216 	char *event = NULL, *sub = NULL, *match;
1217 	int ret;
1218 
1219 	if (!tr)
1220 		return -ENOENT;
1221 	/*
1222 	 * The buf format can be <subsystem>:<event-name>
1223 	 *  *:<event-name> means any event by that name.
1224 	 *  :<event-name> is the same.
1225 	 *
1226 	 *  <subsystem>:* means all events in that subsystem
1227 	 *  <subsystem>: means the same.
1228 	 *
1229 	 *  <name> (no ':') means all events in a subsystem with
1230 	 *  the name <name> or any event that matches <name>
1231 	 */
1232 
1233 	match = strsep(&buf, ":");
1234 	if (buf) {
1235 		sub = match;
1236 		event = buf;
1237 		match = NULL;
1238 
1239 		if (!strlen(sub) || strcmp(sub, "*") == 0)
1240 			sub = NULL;
1241 		if (!strlen(event) || strcmp(event, "*") == 0)
1242 			event = NULL;
1243 	}
1244 
1245 	ret = __ftrace_set_clr_event(tr, match, sub, event, set);
1246 
1247 	/* Put back the colon to allow this to be called again */
1248 	if (buf)
1249 		*(buf - 1) = ':';
1250 
1251 	return ret;
1252 }
1253 
1254 /**
1255  * trace_set_clr_event - enable or disable an event
1256  * @system: system name to match (NULL for any system)
1257  * @event: event name to match (NULL for all events, within system)
1258  * @set: 1 to enable, 0 to disable
1259  *
1260  * This is a way for other parts of the kernel to enable or disable
1261  * event recording.
1262  *
1263  * Returns 0 on success, -EINVAL if the parameters do not match any
1264  * registered events.
1265  */
1266 int trace_set_clr_event(const char *system, const char *event, int set)
1267 {
1268 	struct trace_array *tr = top_trace_array();
1269 
1270 	if (!tr)
1271 		return -ENODEV;
1272 
1273 	return __ftrace_set_clr_event(tr, NULL, system, event, set);
1274 }
1275 EXPORT_SYMBOL_GPL(trace_set_clr_event);
1276 
1277 /**
1278  * trace_array_set_clr_event - enable or disable an event for a trace array.
1279  * @tr: concerned trace array.
1280  * @system: system name to match (NULL for any system)
1281  * @event: event name to match (NULL for all events, within system)
1282  * @enable: true to enable, false to disable
1283  *
1284  * This is a way for other parts of the kernel to enable or disable
1285  * event recording.
1286  *
1287  * Returns 0 on success, -EINVAL if the parameters do not match any
1288  * registered events.
1289  */
1290 int trace_array_set_clr_event(struct trace_array *tr, const char *system,
1291 		const char *event, bool enable)
1292 {
1293 	int set;
1294 
1295 	if (!tr)
1296 		return -ENOENT;
1297 
1298 	set = (enable == true) ? 1 : 0;
1299 	return __ftrace_set_clr_event(tr, NULL, system, event, set);
1300 }
1301 EXPORT_SYMBOL_GPL(trace_array_set_clr_event);
1302 
1303 /* 128 should be much more than enough */
1304 #define EVENT_BUF_SIZE		127
1305 
1306 static ssize_t
1307 ftrace_event_write(struct file *file, const char __user *ubuf,
1308 		   size_t cnt, loff_t *ppos)
1309 {
1310 	struct trace_parser parser;
1311 	struct seq_file *m = file->private_data;
1312 	struct trace_array *tr = m->private;
1313 	ssize_t read, ret;
1314 
1315 	if (!cnt)
1316 		return 0;
1317 
1318 	ret = tracing_update_buffers(tr);
1319 	if (ret < 0)
1320 		return ret;
1321 
1322 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
1323 		return -ENOMEM;
1324 
1325 	read = trace_get_user(&parser, ubuf, cnt, ppos);
1326 
1327 	if (read >= 0 && trace_parser_loaded((&parser))) {
1328 		int set = 1;
1329 
1330 		if (*parser.buffer == '!')
1331 			set = 0;
1332 
1333 		ret = ftrace_set_clr_event(tr, parser.buffer + !set, set);
1334 		if (ret)
1335 			goto out_put;
1336 	}
1337 
1338 	ret = read;
1339 
1340  out_put:
1341 	trace_parser_put(&parser);
1342 
1343 	return ret;
1344 }
1345 
1346 static void *
1347 t_next(struct seq_file *m, void *v, loff_t *pos)
1348 {
1349 	struct trace_event_file *file = v;
1350 	struct trace_event_call *call;
1351 	struct trace_array *tr = m->private;
1352 
1353 	(*pos)++;
1354 
1355 	list_for_each_entry_continue(file, &tr->events, list) {
1356 		call = file->event_call;
1357 		/*
1358 		 * The ftrace subsystem is for showing formats only.
1359 		 * They can not be enabled or disabled via the event files.
1360 		 */
1361 		if (call->class && call->class->reg &&
1362 		    !(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE))
1363 			return file;
1364 	}
1365 
1366 	return NULL;
1367 }
1368 
1369 static void *t_start(struct seq_file *m, loff_t *pos)
1370 {
1371 	struct trace_event_file *file;
1372 	struct trace_array *tr = m->private;
1373 	loff_t l;
1374 
1375 	mutex_lock(&event_mutex);
1376 
1377 	file = list_entry(&tr->events, struct trace_event_file, list);
1378 	for (l = 0; l <= *pos; ) {
1379 		file = t_next(m, file, &l);
1380 		if (!file)
1381 			break;
1382 	}
1383 	return file;
1384 }
1385 
1386 static void *
1387 s_next(struct seq_file *m, void *v, loff_t *pos)
1388 {
1389 	struct trace_event_file *file = v;
1390 	struct trace_array *tr = m->private;
1391 
1392 	(*pos)++;
1393 
1394 	list_for_each_entry_continue(file, &tr->events, list) {
1395 		if (file->flags & EVENT_FILE_FL_ENABLED)
1396 			return file;
1397 	}
1398 
1399 	return NULL;
1400 }
1401 
1402 static void *s_start(struct seq_file *m, loff_t *pos)
1403 {
1404 	struct trace_event_file *file;
1405 	struct trace_array *tr = m->private;
1406 	loff_t l;
1407 
1408 	mutex_lock(&event_mutex);
1409 
1410 	file = list_entry(&tr->events, struct trace_event_file, list);
1411 	for (l = 0; l <= *pos; ) {
1412 		file = s_next(m, file, &l);
1413 		if (!file)
1414 			break;
1415 	}
1416 	return file;
1417 }
1418 
1419 static int t_show(struct seq_file *m, void *v)
1420 {
1421 	struct trace_event_file *file = v;
1422 	struct trace_event_call *call = file->event_call;
1423 
1424 	if (strcmp(call->class->system, TRACE_SYSTEM) != 0)
1425 		seq_printf(m, "%s:", call->class->system);
1426 	seq_printf(m, "%s\n", trace_event_name(call));
1427 
1428 	return 0;
1429 }
1430 
1431 static void t_stop(struct seq_file *m, void *p)
1432 {
1433 	mutex_unlock(&event_mutex);
1434 }
1435 
1436 static void *
1437 __next(struct seq_file *m, void *v, loff_t *pos, int type)
1438 {
1439 	struct trace_array *tr = m->private;
1440 	struct trace_pid_list *pid_list;
1441 
1442 	if (type == TRACE_PIDS)
1443 		pid_list = rcu_dereference_sched(tr->filtered_pids);
1444 	else
1445 		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1446 
1447 	return trace_pid_next(pid_list, v, pos);
1448 }
1449 
1450 static void *
1451 p_next(struct seq_file *m, void *v, loff_t *pos)
1452 {
1453 	return __next(m, v, pos, TRACE_PIDS);
1454 }
1455 
1456 static void *
1457 np_next(struct seq_file *m, void *v, loff_t *pos)
1458 {
1459 	return __next(m, v, pos, TRACE_NO_PIDS);
1460 }
1461 
1462 static void *__start(struct seq_file *m, loff_t *pos, int type)
1463 	__acquires(RCU)
1464 {
1465 	struct trace_pid_list *pid_list;
1466 	struct trace_array *tr = m->private;
1467 
1468 	/*
1469 	 * Grab the mutex, to keep calls to p_next() having the same
1470 	 * tr->filtered_pids as p_start() has.
1471 	 * If we just passed the tr->filtered_pids around, then RCU would
1472 	 * have been enough, but doing that makes things more complex.
1473 	 */
1474 	mutex_lock(&event_mutex);
1475 	rcu_read_lock_sched();
1476 
1477 	if (type == TRACE_PIDS)
1478 		pid_list = rcu_dereference_sched(tr->filtered_pids);
1479 	else
1480 		pid_list = rcu_dereference_sched(tr->filtered_no_pids);
1481 
1482 	if (!pid_list)
1483 		return NULL;
1484 
1485 	return trace_pid_start(pid_list, pos);
1486 }
1487 
1488 static void *p_start(struct seq_file *m, loff_t *pos)
1489 	__acquires(RCU)
1490 {
1491 	return __start(m, pos, TRACE_PIDS);
1492 }
1493 
1494 static void *np_start(struct seq_file *m, loff_t *pos)
1495 	__acquires(RCU)
1496 {
1497 	return __start(m, pos, TRACE_NO_PIDS);
1498 }
1499 
1500 static void p_stop(struct seq_file *m, void *p)
1501 	__releases(RCU)
1502 {
1503 	rcu_read_unlock_sched();
1504 	mutex_unlock(&event_mutex);
1505 }
1506 
1507 static ssize_t
1508 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1509 		  loff_t *ppos)
1510 {
1511 	struct trace_event_file *file;
1512 	unsigned long flags;
1513 	char buf[4] = "0";
1514 
1515 	mutex_lock(&event_mutex);
1516 	file = event_file_file(filp);
1517 	if (likely(file))
1518 		flags = file->flags;
1519 	mutex_unlock(&event_mutex);
1520 
1521 	if (!file)
1522 		return -ENODEV;
1523 
1524 	if (flags & EVENT_FILE_FL_ENABLED &&
1525 	    !(flags & EVENT_FILE_FL_SOFT_DISABLED))
1526 		strcpy(buf, "1");
1527 
1528 	if (flags & EVENT_FILE_FL_SOFT_DISABLED ||
1529 	    flags & EVENT_FILE_FL_SOFT_MODE)
1530 		strcat(buf, "*");
1531 
1532 	strcat(buf, "\n");
1533 
1534 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, strlen(buf));
1535 }
1536 
1537 static ssize_t
1538 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1539 		   loff_t *ppos)
1540 {
1541 	struct trace_event_file *file;
1542 	unsigned long val;
1543 	int ret;
1544 
1545 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1546 	if (ret)
1547 		return ret;
1548 
1549 	guard(mutex)(&event_mutex);
1550 
1551 	switch (val) {
1552 	case 0:
1553 	case 1:
1554 		file = event_file_file(filp);
1555 		if (!file)
1556 			return -ENODEV;
1557 		ret = tracing_update_buffers(file->tr);
1558 		if (ret < 0)
1559 			return ret;
1560 		ret = ftrace_event_enable_disable(file, val);
1561 		if (ret < 0)
1562 			return ret;
1563 		break;
1564 
1565 	default:
1566 		return -EINVAL;
1567 	}
1568 
1569 	*ppos += cnt;
1570 
1571 	return cnt;
1572 }
1573 
1574 static ssize_t
1575 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
1576 		   loff_t *ppos)
1577 {
1578 	const char set_to_char[4] = { '?', '0', '1', 'X' };
1579 	struct trace_subsystem_dir *dir = filp->private_data;
1580 	struct event_subsystem *system = dir->subsystem;
1581 	struct trace_event_call *call;
1582 	struct trace_event_file *file;
1583 	struct trace_array *tr = dir->tr;
1584 	char buf[2];
1585 	int set = 0;
1586 	int ret;
1587 
1588 	mutex_lock(&event_mutex);
1589 	list_for_each_entry(file, &tr->events, list) {
1590 		call = file->event_call;
1591 		if ((call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
1592 		    !trace_event_name(call) || !call->class || !call->class->reg)
1593 			continue;
1594 
1595 		if (system && strcmp(call->class->system, system->name) != 0)
1596 			continue;
1597 
1598 		/*
1599 		 * We need to find out if all the events are set
1600 		 * or if all events or cleared, or if we have
1601 		 * a mixture.
1602 		 */
1603 		set |= (1 << !!(file->flags & EVENT_FILE_FL_ENABLED));
1604 
1605 		/*
1606 		 * If we have a mixture, no need to look further.
1607 		 */
1608 		if (set == 3)
1609 			break;
1610 	}
1611 	mutex_unlock(&event_mutex);
1612 
1613 	buf[0] = set_to_char[set];
1614 	buf[1] = '\n';
1615 
1616 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
1617 
1618 	return ret;
1619 }
1620 
1621 static ssize_t
1622 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
1623 		    loff_t *ppos)
1624 {
1625 	struct trace_subsystem_dir *dir = filp->private_data;
1626 	struct event_subsystem *system = dir->subsystem;
1627 	const char *name = NULL;
1628 	unsigned long val;
1629 	ssize_t ret;
1630 
1631 	ret = kstrtoul_from_user(ubuf, cnt, 10, &val);
1632 	if (ret)
1633 		return ret;
1634 
1635 	ret = tracing_update_buffers(dir->tr);
1636 	if (ret < 0)
1637 		return ret;
1638 
1639 	if (val != 0 && val != 1)
1640 		return -EINVAL;
1641 
1642 	/*
1643 	 * Opening of "enable" adds a ref count to system,
1644 	 * so the name is safe to use.
1645 	 */
1646 	if (system)
1647 		name = system->name;
1648 
1649 	ret = __ftrace_set_clr_event(dir->tr, NULL, name, NULL, val);
1650 	if (ret)
1651 		goto out;
1652 
1653 	ret = cnt;
1654 
1655 out:
1656 	*ppos += cnt;
1657 
1658 	return ret;
1659 }
1660 
1661 enum {
1662 	FORMAT_HEADER		= 1,
1663 	FORMAT_FIELD_SEPERATOR	= 2,
1664 	FORMAT_PRINTFMT		= 3,
1665 };
1666 
1667 static void *f_next(struct seq_file *m, void *v, loff_t *pos)
1668 {
1669 	struct trace_event_file *file = event_file_data(m->private);
1670 	struct trace_event_call *call = file->event_call;
1671 	struct list_head *common_head = &ftrace_common_fields;
1672 	struct list_head *head = trace_get_fields(call);
1673 	struct list_head *node = v;
1674 
1675 	(*pos)++;
1676 
1677 	switch ((unsigned long)v) {
1678 	case FORMAT_HEADER:
1679 		node = common_head;
1680 		break;
1681 
1682 	case FORMAT_FIELD_SEPERATOR:
1683 		node = head;
1684 		break;
1685 
1686 	case FORMAT_PRINTFMT:
1687 		/* all done */
1688 		return NULL;
1689 	}
1690 
1691 	node = node->prev;
1692 	if (node == common_head)
1693 		return (void *)FORMAT_FIELD_SEPERATOR;
1694 	else if (node == head)
1695 		return (void *)FORMAT_PRINTFMT;
1696 	else
1697 		return node;
1698 }
1699 
1700 static int f_show(struct seq_file *m, void *v)
1701 {
1702 	struct trace_event_file *file = event_file_data(m->private);
1703 	struct trace_event_call *call = file->event_call;
1704 	struct ftrace_event_field *field;
1705 	const char *array_descriptor;
1706 
1707 	switch ((unsigned long)v) {
1708 	case FORMAT_HEADER:
1709 		seq_printf(m, "name: %s\n", trace_event_name(call));
1710 		seq_printf(m, "ID: %d\n", call->event.type);
1711 		seq_puts(m, "format:\n");
1712 		return 0;
1713 
1714 	case FORMAT_FIELD_SEPERATOR:
1715 		seq_putc(m, '\n');
1716 		return 0;
1717 
1718 	case FORMAT_PRINTFMT:
1719 		seq_printf(m, "\nprint fmt: %s\n",
1720 			   call->print_fmt);
1721 		return 0;
1722 	}
1723 
1724 	field = list_entry(v, struct ftrace_event_field, link);
1725 	/*
1726 	 * Smartly shows the array type(except dynamic array).
1727 	 * Normal:
1728 	 *	field:TYPE VAR
1729 	 * If TYPE := TYPE[LEN], it is shown:
1730 	 *	field:TYPE VAR[LEN]
1731 	 */
1732 	array_descriptor = strchr(field->type, '[');
1733 
1734 	if (str_has_prefix(field->type, "__data_loc"))
1735 		array_descriptor = NULL;
1736 
1737 	if (!array_descriptor)
1738 		seq_printf(m, "\tfield:%s %s;\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1739 			   field->type, field->name, field->offset,
1740 			   field->size, !!field->is_signed);
1741 	else if (field->len)
1742 		seq_printf(m, "\tfield:%.*s %s[%d];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1743 			   (int)(array_descriptor - field->type),
1744 			   field->type, field->name,
1745 			   field->len, field->offset,
1746 			   field->size, !!field->is_signed);
1747 	else
1748 		seq_printf(m, "\tfield:%.*s %s[];\toffset:%u;\tsize:%u;\tsigned:%d;\n",
1749 				(int)(array_descriptor - field->type),
1750 				field->type, field->name,
1751 				field->offset, field->size, !!field->is_signed);
1752 
1753 	return 0;
1754 }
1755 
1756 static void *f_start(struct seq_file *m, loff_t *pos)
1757 {
1758 	struct trace_event_file *file;
1759 	void *p = (void *)FORMAT_HEADER;
1760 	loff_t l = 0;
1761 
1762 	/* ->stop() is called even if ->start() fails */
1763 	mutex_lock(&event_mutex);
1764 	file = event_file_file(m->private);
1765 	if (!file)
1766 		return ERR_PTR(-ENODEV);
1767 
1768 	while (l < *pos && p)
1769 		p = f_next(m, p, &l);
1770 
1771 	return p;
1772 }
1773 
1774 static void f_stop(struct seq_file *m, void *p)
1775 {
1776 	mutex_unlock(&event_mutex);
1777 }
1778 
1779 static const struct seq_operations trace_format_seq_ops = {
1780 	.start		= f_start,
1781 	.next		= f_next,
1782 	.stop		= f_stop,
1783 	.show		= f_show,
1784 };
1785 
1786 static int trace_format_open(struct inode *inode, struct file *file)
1787 {
1788 	struct seq_file *m;
1789 	int ret;
1790 
1791 	/* Do we want to hide event format files on tracefs lockdown? */
1792 
1793 	ret = seq_open(file, &trace_format_seq_ops);
1794 	if (ret < 0)
1795 		return ret;
1796 
1797 	m = file->private_data;
1798 	m->private = file;
1799 
1800 	return 0;
1801 }
1802 
1803 #ifdef CONFIG_PERF_EVENTS
1804 static ssize_t
1805 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
1806 {
1807 	int id = (long)event_file_data(filp);
1808 	char buf[32];
1809 	int len;
1810 
1811 	if (unlikely(!id))
1812 		return -ENODEV;
1813 
1814 	len = sprintf(buf, "%d\n", id);
1815 
1816 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, len);
1817 }
1818 #endif
1819 
1820 static ssize_t
1821 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1822 		  loff_t *ppos)
1823 {
1824 	struct trace_event_file *file;
1825 	struct trace_seq *s;
1826 	int r = -ENODEV;
1827 
1828 	if (*ppos)
1829 		return 0;
1830 
1831 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1832 
1833 	if (!s)
1834 		return -ENOMEM;
1835 
1836 	trace_seq_init(s);
1837 
1838 	mutex_lock(&event_mutex);
1839 	file = event_file_file(filp);
1840 	if (file)
1841 		print_event_filter(file, s);
1842 	mutex_unlock(&event_mutex);
1843 
1844 	if (file)
1845 		r = simple_read_from_buffer(ubuf, cnt, ppos,
1846 					    s->buffer, trace_seq_used(s));
1847 
1848 	kfree(s);
1849 
1850 	return r;
1851 }
1852 
1853 static ssize_t
1854 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
1855 		   loff_t *ppos)
1856 {
1857 	struct trace_event_file *file;
1858 	char *buf;
1859 	int err = -ENODEV;
1860 
1861 	if (cnt >= PAGE_SIZE)
1862 		return -EINVAL;
1863 
1864 	buf = memdup_user_nul(ubuf, cnt);
1865 	if (IS_ERR(buf))
1866 		return PTR_ERR(buf);
1867 
1868 	mutex_lock(&event_mutex);
1869 	file = event_file_file(filp);
1870 	if (file) {
1871 		if (file->flags & EVENT_FILE_FL_FREED)
1872 			err = -ENODEV;
1873 		else
1874 			err = apply_event_filter(file, buf);
1875 	}
1876 	mutex_unlock(&event_mutex);
1877 
1878 	kfree(buf);
1879 	if (err < 0)
1880 		return err;
1881 
1882 	*ppos += cnt;
1883 
1884 	return cnt;
1885 }
1886 
1887 static LIST_HEAD(event_subsystems);
1888 
1889 static int subsystem_open(struct inode *inode, struct file *filp)
1890 {
1891 	struct trace_subsystem_dir *dir = NULL, *iter_dir;
1892 	struct trace_array *tr = NULL, *iter_tr;
1893 	struct event_subsystem *system = NULL;
1894 	int ret;
1895 
1896 	if (tracing_is_disabled())
1897 		return -ENODEV;
1898 
1899 	/* Make sure the system still exists */
1900 	mutex_lock(&event_mutex);
1901 	mutex_lock(&trace_types_lock);
1902 	list_for_each_entry(iter_tr, &ftrace_trace_arrays, list) {
1903 		list_for_each_entry(iter_dir, &iter_tr->systems, list) {
1904 			if (iter_dir == inode->i_private) {
1905 				/* Don't open systems with no events */
1906 				tr = iter_tr;
1907 				dir = iter_dir;
1908 				if (dir->nr_events) {
1909 					__get_system_dir(dir);
1910 					system = dir->subsystem;
1911 				}
1912 				goto exit_loop;
1913 			}
1914 		}
1915 	}
1916  exit_loop:
1917 	mutex_unlock(&trace_types_lock);
1918 	mutex_unlock(&event_mutex);
1919 
1920 	if (!system)
1921 		return -ENODEV;
1922 
1923 	/* Still need to increment the ref count of the system */
1924 	if (trace_array_get(tr) < 0) {
1925 		put_system(dir);
1926 		return -ENODEV;
1927 	}
1928 
1929 	ret = tracing_open_generic(inode, filp);
1930 	if (ret < 0) {
1931 		trace_array_put(tr);
1932 		put_system(dir);
1933 	}
1934 
1935 	return ret;
1936 }
1937 
1938 static int system_tr_open(struct inode *inode, struct file *filp)
1939 {
1940 	struct trace_subsystem_dir *dir;
1941 	struct trace_array *tr = inode->i_private;
1942 	int ret;
1943 
1944 	/* Make a temporary dir that has no system but points to tr */
1945 	dir = kzalloc(sizeof(*dir), GFP_KERNEL);
1946 	if (!dir)
1947 		return -ENOMEM;
1948 
1949 	ret = tracing_open_generic_tr(inode, filp);
1950 	if (ret < 0) {
1951 		kfree(dir);
1952 		return ret;
1953 	}
1954 	dir->tr = tr;
1955 	filp->private_data = dir;
1956 
1957 	return 0;
1958 }
1959 
1960 static int subsystem_release(struct inode *inode, struct file *file)
1961 {
1962 	struct trace_subsystem_dir *dir = file->private_data;
1963 
1964 	trace_array_put(dir->tr);
1965 
1966 	/*
1967 	 * If dir->subsystem is NULL, then this is a temporary
1968 	 * descriptor that was made for a trace_array to enable
1969 	 * all subsystems.
1970 	 */
1971 	if (dir->subsystem)
1972 		put_system(dir);
1973 	else
1974 		kfree(dir);
1975 
1976 	return 0;
1977 }
1978 
1979 static ssize_t
1980 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
1981 		      loff_t *ppos)
1982 {
1983 	struct trace_subsystem_dir *dir = filp->private_data;
1984 	struct event_subsystem *system = dir->subsystem;
1985 	struct trace_seq *s;
1986 	int r;
1987 
1988 	if (*ppos)
1989 		return 0;
1990 
1991 	s = kmalloc(sizeof(*s), GFP_KERNEL);
1992 	if (!s)
1993 		return -ENOMEM;
1994 
1995 	trace_seq_init(s);
1996 
1997 	print_subsystem_event_filter(system, s);
1998 	r = simple_read_from_buffer(ubuf, cnt, ppos,
1999 				    s->buffer, trace_seq_used(s));
2000 
2001 	kfree(s);
2002 
2003 	return r;
2004 }
2005 
2006 static ssize_t
2007 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
2008 		       loff_t *ppos)
2009 {
2010 	struct trace_subsystem_dir *dir = filp->private_data;
2011 	char *buf;
2012 	int err;
2013 
2014 	if (cnt >= PAGE_SIZE)
2015 		return -EINVAL;
2016 
2017 	buf = memdup_user_nul(ubuf, cnt);
2018 	if (IS_ERR(buf))
2019 		return PTR_ERR(buf);
2020 
2021 	err = apply_subsystem_event_filter(dir, buf);
2022 	kfree(buf);
2023 	if (err < 0)
2024 		return err;
2025 
2026 	*ppos += cnt;
2027 
2028 	return cnt;
2029 }
2030 
2031 static ssize_t
2032 show_header_page_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
2033 {
2034 	struct trace_array *tr = filp->private_data;
2035 	struct trace_seq *s;
2036 	int r;
2037 
2038 	if (*ppos)
2039 		return 0;
2040 
2041 	s = kmalloc(sizeof(*s), GFP_KERNEL);
2042 	if (!s)
2043 		return -ENOMEM;
2044 
2045 	trace_seq_init(s);
2046 
2047 	ring_buffer_print_page_header(tr->array_buffer.buffer, s);
2048 	r = simple_read_from_buffer(ubuf, cnt, ppos,
2049 				    s->buffer, trace_seq_used(s));
2050 
2051 	kfree(s);
2052 
2053 	return r;
2054 }
2055 
2056 static ssize_t
2057 show_header_event_file(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
2058 {
2059 	struct trace_seq *s;
2060 	int r;
2061 
2062 	if (*ppos)
2063 		return 0;
2064 
2065 	s = kmalloc(sizeof(*s), GFP_KERNEL);
2066 	if (!s)
2067 		return -ENOMEM;
2068 
2069 	trace_seq_init(s);
2070 
2071 	ring_buffer_print_entry_header(s);
2072 	r = simple_read_from_buffer(ubuf, cnt, ppos,
2073 				    s->buffer, trace_seq_used(s));
2074 
2075 	kfree(s);
2076 
2077 	return r;
2078 }
2079 
2080 static void ignore_task_cpu(void *data)
2081 {
2082 	struct trace_array *tr = data;
2083 	struct trace_pid_list *pid_list;
2084 	struct trace_pid_list *no_pid_list;
2085 
2086 	/*
2087 	 * This function is called by on_each_cpu() while the
2088 	 * event_mutex is held.
2089 	 */
2090 	pid_list = rcu_dereference_protected(tr->filtered_pids,
2091 					     mutex_is_locked(&event_mutex));
2092 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
2093 					     mutex_is_locked(&event_mutex));
2094 
2095 	this_cpu_write(tr->array_buffer.data->ignore_pid,
2096 		       trace_ignore_this_task(pid_list, no_pid_list, current));
2097 }
2098 
2099 static void register_pid_events(struct trace_array *tr)
2100 {
2101 	/*
2102 	 * Register a probe that is called before all other probes
2103 	 * to set ignore_pid if next or prev do not match.
2104 	 * Register a probe this is called after all other probes
2105 	 * to only keep ignore_pid set if next pid matches.
2106 	 */
2107 	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_pre,
2108 					 tr, INT_MAX);
2109 	register_trace_prio_sched_switch(event_filter_pid_sched_switch_probe_post,
2110 					 tr, 0);
2111 
2112 	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_pre,
2113 					 tr, INT_MAX);
2114 	register_trace_prio_sched_wakeup(event_filter_pid_sched_wakeup_probe_post,
2115 					 tr, 0);
2116 
2117 	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_pre,
2118 					     tr, INT_MAX);
2119 	register_trace_prio_sched_wakeup_new(event_filter_pid_sched_wakeup_probe_post,
2120 					     tr, 0);
2121 
2122 	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_pre,
2123 					 tr, INT_MAX);
2124 	register_trace_prio_sched_waking(event_filter_pid_sched_wakeup_probe_post,
2125 					 tr, 0);
2126 }
2127 
2128 static ssize_t
2129 event_pid_write(struct file *filp, const char __user *ubuf,
2130 		size_t cnt, loff_t *ppos, int type)
2131 {
2132 	struct seq_file *m = filp->private_data;
2133 	struct trace_array *tr = m->private;
2134 	struct trace_pid_list *filtered_pids = NULL;
2135 	struct trace_pid_list *other_pids = NULL;
2136 	struct trace_pid_list *pid_list;
2137 	struct trace_event_file *file;
2138 	ssize_t ret;
2139 
2140 	if (!cnt)
2141 		return 0;
2142 
2143 	ret = tracing_update_buffers(tr);
2144 	if (ret < 0)
2145 		return ret;
2146 
2147 	guard(mutex)(&event_mutex);
2148 
2149 	if (type == TRACE_PIDS) {
2150 		filtered_pids = rcu_dereference_protected(tr->filtered_pids,
2151 							  lockdep_is_held(&event_mutex));
2152 		other_pids = rcu_dereference_protected(tr->filtered_no_pids,
2153 							  lockdep_is_held(&event_mutex));
2154 	} else {
2155 		filtered_pids = rcu_dereference_protected(tr->filtered_no_pids,
2156 							  lockdep_is_held(&event_mutex));
2157 		other_pids = rcu_dereference_protected(tr->filtered_pids,
2158 							  lockdep_is_held(&event_mutex));
2159 	}
2160 
2161 	ret = trace_pid_write(filtered_pids, &pid_list, ubuf, cnt);
2162 	if (ret < 0)
2163 		return ret;
2164 
2165 	if (type == TRACE_PIDS)
2166 		rcu_assign_pointer(tr->filtered_pids, pid_list);
2167 	else
2168 		rcu_assign_pointer(tr->filtered_no_pids, pid_list);
2169 
2170 	list_for_each_entry(file, &tr->events, list) {
2171 		set_bit(EVENT_FILE_FL_PID_FILTER_BIT, &file->flags);
2172 	}
2173 
2174 	if (filtered_pids) {
2175 		tracepoint_synchronize_unregister();
2176 		trace_pid_list_free(filtered_pids);
2177 	} else if (pid_list && !other_pids) {
2178 		register_pid_events(tr);
2179 	}
2180 
2181 	/*
2182 	 * Ignoring of pids is done at task switch. But we have to
2183 	 * check for those tasks that are currently running.
2184 	 * Always do this in case a pid was appended or removed.
2185 	 */
2186 	on_each_cpu(ignore_task_cpu, tr, 1);
2187 
2188 	*ppos += ret;
2189 
2190 	return ret;
2191 }
2192 
2193 static ssize_t
2194 ftrace_event_pid_write(struct file *filp, const char __user *ubuf,
2195 		       size_t cnt, loff_t *ppos)
2196 {
2197 	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_PIDS);
2198 }
2199 
2200 static ssize_t
2201 ftrace_event_npid_write(struct file *filp, const char __user *ubuf,
2202 			size_t cnt, loff_t *ppos)
2203 {
2204 	return event_pid_write(filp, ubuf, cnt, ppos, TRACE_NO_PIDS);
2205 }
2206 
2207 static int ftrace_event_avail_open(struct inode *inode, struct file *file);
2208 static int ftrace_event_set_open(struct inode *inode, struct file *file);
2209 static int ftrace_event_set_pid_open(struct inode *inode, struct file *file);
2210 static int ftrace_event_set_npid_open(struct inode *inode, struct file *file);
2211 static int ftrace_event_release(struct inode *inode, struct file *file);
2212 
2213 static const struct seq_operations show_event_seq_ops = {
2214 	.start = t_start,
2215 	.next = t_next,
2216 	.show = t_show,
2217 	.stop = t_stop,
2218 };
2219 
2220 static const struct seq_operations show_set_event_seq_ops = {
2221 	.start = s_start,
2222 	.next = s_next,
2223 	.show = t_show,
2224 	.stop = t_stop,
2225 };
2226 
2227 static const struct seq_operations show_set_pid_seq_ops = {
2228 	.start = p_start,
2229 	.next = p_next,
2230 	.show = trace_pid_show,
2231 	.stop = p_stop,
2232 };
2233 
2234 static const struct seq_operations show_set_no_pid_seq_ops = {
2235 	.start = np_start,
2236 	.next = np_next,
2237 	.show = trace_pid_show,
2238 	.stop = p_stop,
2239 };
2240 
2241 static const struct file_operations ftrace_avail_fops = {
2242 	.open = ftrace_event_avail_open,
2243 	.read = seq_read,
2244 	.llseek = seq_lseek,
2245 	.release = seq_release,
2246 };
2247 
2248 static const struct file_operations ftrace_set_event_fops = {
2249 	.open = ftrace_event_set_open,
2250 	.read = seq_read,
2251 	.write = ftrace_event_write,
2252 	.llseek = seq_lseek,
2253 	.release = ftrace_event_release,
2254 };
2255 
2256 static const struct file_operations ftrace_set_event_pid_fops = {
2257 	.open = ftrace_event_set_pid_open,
2258 	.read = seq_read,
2259 	.write = ftrace_event_pid_write,
2260 	.llseek = seq_lseek,
2261 	.release = ftrace_event_release,
2262 };
2263 
2264 static const struct file_operations ftrace_set_event_notrace_pid_fops = {
2265 	.open = ftrace_event_set_npid_open,
2266 	.read = seq_read,
2267 	.write = ftrace_event_npid_write,
2268 	.llseek = seq_lseek,
2269 	.release = ftrace_event_release,
2270 };
2271 
2272 static const struct file_operations ftrace_enable_fops = {
2273 	.open = tracing_open_file_tr,
2274 	.read = event_enable_read,
2275 	.write = event_enable_write,
2276 	.release = tracing_release_file_tr,
2277 	.llseek = default_llseek,
2278 };
2279 
2280 static const struct file_operations ftrace_event_format_fops = {
2281 	.open = trace_format_open,
2282 	.read = seq_read,
2283 	.llseek = seq_lseek,
2284 	.release = seq_release,
2285 };
2286 
2287 #ifdef CONFIG_PERF_EVENTS
2288 static const struct file_operations ftrace_event_id_fops = {
2289 	.read = event_id_read,
2290 	.llseek = default_llseek,
2291 };
2292 #endif
2293 
2294 static const struct file_operations ftrace_event_filter_fops = {
2295 	.open = tracing_open_file_tr,
2296 	.read = event_filter_read,
2297 	.write = event_filter_write,
2298 	.release = tracing_release_file_tr,
2299 	.llseek = default_llseek,
2300 };
2301 
2302 static const struct file_operations ftrace_subsystem_filter_fops = {
2303 	.open = subsystem_open,
2304 	.read = subsystem_filter_read,
2305 	.write = subsystem_filter_write,
2306 	.llseek = default_llseek,
2307 	.release = subsystem_release,
2308 };
2309 
2310 static const struct file_operations ftrace_system_enable_fops = {
2311 	.open = subsystem_open,
2312 	.read = system_enable_read,
2313 	.write = system_enable_write,
2314 	.llseek = default_llseek,
2315 	.release = subsystem_release,
2316 };
2317 
2318 static const struct file_operations ftrace_tr_enable_fops = {
2319 	.open = system_tr_open,
2320 	.read = system_enable_read,
2321 	.write = system_enable_write,
2322 	.llseek = default_llseek,
2323 	.release = subsystem_release,
2324 };
2325 
2326 static const struct file_operations ftrace_show_header_page_fops = {
2327 	.open = tracing_open_generic_tr,
2328 	.read = show_header_page_file,
2329 	.llseek = default_llseek,
2330 	.release = tracing_release_generic_tr,
2331 };
2332 
2333 static const struct file_operations ftrace_show_header_event_fops = {
2334 	.open = tracing_open_generic_tr,
2335 	.read = show_header_event_file,
2336 	.llseek = default_llseek,
2337 	.release = tracing_release_generic_tr,
2338 };
2339 
2340 static int
2341 ftrace_event_open(struct inode *inode, struct file *file,
2342 		  const struct seq_operations *seq_ops)
2343 {
2344 	struct seq_file *m;
2345 	int ret;
2346 
2347 	ret = security_locked_down(LOCKDOWN_TRACEFS);
2348 	if (ret)
2349 		return ret;
2350 
2351 	ret = seq_open(file, seq_ops);
2352 	if (ret < 0)
2353 		return ret;
2354 	m = file->private_data;
2355 	/* copy tr over to seq ops */
2356 	m->private = inode->i_private;
2357 
2358 	return ret;
2359 }
2360 
2361 static int ftrace_event_release(struct inode *inode, struct file *file)
2362 {
2363 	struct trace_array *tr = inode->i_private;
2364 
2365 	trace_array_put(tr);
2366 
2367 	return seq_release(inode, file);
2368 }
2369 
2370 static int
2371 ftrace_event_avail_open(struct inode *inode, struct file *file)
2372 {
2373 	const struct seq_operations *seq_ops = &show_event_seq_ops;
2374 
2375 	/* Checks for tracefs lockdown */
2376 	return ftrace_event_open(inode, file, seq_ops);
2377 }
2378 
2379 static int
2380 ftrace_event_set_open(struct inode *inode, struct file *file)
2381 {
2382 	const struct seq_operations *seq_ops = &show_set_event_seq_ops;
2383 	struct trace_array *tr = inode->i_private;
2384 	int ret;
2385 
2386 	ret = tracing_check_open_get_tr(tr);
2387 	if (ret)
2388 		return ret;
2389 
2390 	if ((file->f_mode & FMODE_WRITE) &&
2391 	    (file->f_flags & O_TRUNC))
2392 		ftrace_clear_events(tr);
2393 
2394 	ret = ftrace_event_open(inode, file, seq_ops);
2395 	if (ret < 0)
2396 		trace_array_put(tr);
2397 	return ret;
2398 }
2399 
2400 static int
2401 ftrace_event_set_pid_open(struct inode *inode, struct file *file)
2402 {
2403 	const struct seq_operations *seq_ops = &show_set_pid_seq_ops;
2404 	struct trace_array *tr = inode->i_private;
2405 	int ret;
2406 
2407 	ret = tracing_check_open_get_tr(tr);
2408 	if (ret)
2409 		return ret;
2410 
2411 	if ((file->f_mode & FMODE_WRITE) &&
2412 	    (file->f_flags & O_TRUNC))
2413 		ftrace_clear_event_pids(tr, TRACE_PIDS);
2414 
2415 	ret = ftrace_event_open(inode, file, seq_ops);
2416 	if (ret < 0)
2417 		trace_array_put(tr);
2418 	return ret;
2419 }
2420 
2421 static int
2422 ftrace_event_set_npid_open(struct inode *inode, struct file *file)
2423 {
2424 	const struct seq_operations *seq_ops = &show_set_no_pid_seq_ops;
2425 	struct trace_array *tr = inode->i_private;
2426 	int ret;
2427 
2428 	ret = tracing_check_open_get_tr(tr);
2429 	if (ret)
2430 		return ret;
2431 
2432 	if ((file->f_mode & FMODE_WRITE) &&
2433 	    (file->f_flags & O_TRUNC))
2434 		ftrace_clear_event_pids(tr, TRACE_NO_PIDS);
2435 
2436 	ret = ftrace_event_open(inode, file, seq_ops);
2437 	if (ret < 0)
2438 		trace_array_put(tr);
2439 	return ret;
2440 }
2441 
2442 static struct event_subsystem *
2443 create_new_subsystem(const char *name)
2444 {
2445 	struct event_subsystem *system;
2446 
2447 	/* need to create new entry */
2448 	system = kmalloc(sizeof(*system), GFP_KERNEL);
2449 	if (!system)
2450 		return NULL;
2451 
2452 	system->ref_count = 1;
2453 
2454 	/* Only allocate if dynamic (kprobes and modules) */
2455 	system->name = kstrdup_const(name, GFP_KERNEL);
2456 	if (!system->name)
2457 		goto out_free;
2458 
2459 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
2460 	if (!system->filter)
2461 		goto out_free;
2462 
2463 	list_add(&system->list, &event_subsystems);
2464 
2465 	return system;
2466 
2467  out_free:
2468 	kfree_const(system->name);
2469 	kfree(system);
2470 	return NULL;
2471 }
2472 
2473 static int system_callback(const char *name, umode_t *mode, void **data,
2474 		    const struct file_operations **fops)
2475 {
2476 	if (strcmp(name, "filter") == 0)
2477 		*fops = &ftrace_subsystem_filter_fops;
2478 
2479 	else if (strcmp(name, "enable") == 0)
2480 		*fops = &ftrace_system_enable_fops;
2481 
2482 	else
2483 		return 0;
2484 
2485 	*mode = TRACE_MODE_WRITE;
2486 	return 1;
2487 }
2488 
2489 static struct eventfs_inode *
2490 event_subsystem_dir(struct trace_array *tr, const char *name,
2491 		    struct trace_event_file *file, struct eventfs_inode *parent)
2492 {
2493 	struct event_subsystem *system, *iter;
2494 	struct trace_subsystem_dir *dir;
2495 	struct eventfs_inode *ei;
2496 	int nr_entries;
2497 	static struct eventfs_entry system_entries[] = {
2498 		{
2499 			.name		= "filter",
2500 			.callback	= system_callback,
2501 		},
2502 		{
2503 			.name		= "enable",
2504 			.callback	= system_callback,
2505 		}
2506 	};
2507 
2508 	/* First see if we did not already create this dir */
2509 	list_for_each_entry(dir, &tr->systems, list) {
2510 		system = dir->subsystem;
2511 		if (strcmp(system->name, name) == 0) {
2512 			dir->nr_events++;
2513 			file->system = dir;
2514 			return dir->ei;
2515 		}
2516 	}
2517 
2518 	/* Now see if the system itself exists. */
2519 	system = NULL;
2520 	list_for_each_entry(iter, &event_subsystems, list) {
2521 		if (strcmp(iter->name, name) == 0) {
2522 			system = iter;
2523 			break;
2524 		}
2525 	}
2526 
2527 	dir = kmalloc(sizeof(*dir), GFP_KERNEL);
2528 	if (!dir)
2529 		goto out_fail;
2530 
2531 	if (!system) {
2532 		system = create_new_subsystem(name);
2533 		if (!system)
2534 			goto out_free;
2535 	} else
2536 		__get_system(system);
2537 
2538 	/* ftrace only has directories no files */
2539 	if (strcmp(name, "ftrace") == 0)
2540 		nr_entries = 0;
2541 	else
2542 		nr_entries = ARRAY_SIZE(system_entries);
2543 
2544 	ei = eventfs_create_dir(name, parent, system_entries, nr_entries, dir);
2545 	if (IS_ERR(ei)) {
2546 		pr_warn("Failed to create system directory %s\n", name);
2547 		__put_system(system);
2548 		goto out_free;
2549 	}
2550 
2551 	dir->ei = ei;
2552 	dir->tr = tr;
2553 	dir->ref_count = 1;
2554 	dir->nr_events = 1;
2555 	dir->subsystem = system;
2556 	file->system = dir;
2557 
2558 	list_add(&dir->list, &tr->systems);
2559 
2560 	return dir->ei;
2561 
2562  out_free:
2563 	kfree(dir);
2564  out_fail:
2565 	/* Only print this message if failed on memory allocation */
2566 	if (!dir || !system)
2567 		pr_warn("No memory to create event subsystem %s\n", name);
2568 	return NULL;
2569 }
2570 
2571 static int
2572 event_define_fields(struct trace_event_call *call)
2573 {
2574 	struct list_head *head;
2575 	int ret = 0;
2576 
2577 	/*
2578 	 * Other events may have the same class. Only update
2579 	 * the fields if they are not already defined.
2580 	 */
2581 	head = trace_get_fields(call);
2582 	if (list_empty(head)) {
2583 		struct trace_event_fields *field = call->class->fields_array;
2584 		unsigned int offset = sizeof(struct trace_entry);
2585 
2586 		for (; field->type; field++) {
2587 			if (field->type == TRACE_FUNCTION_TYPE) {
2588 				field->define_fields(call);
2589 				break;
2590 			}
2591 
2592 			offset = ALIGN(offset, field->align);
2593 			ret = trace_define_field_ext(call, field->type, field->name,
2594 						 offset, field->size,
2595 						 field->is_signed, field->filter_type,
2596 						 field->len, field->needs_test);
2597 			if (WARN_ON_ONCE(ret)) {
2598 				pr_err("error code is %d\n", ret);
2599 				break;
2600 			}
2601 
2602 			offset += field->size;
2603 		}
2604 	}
2605 
2606 	return ret;
2607 }
2608 
2609 static int event_callback(const char *name, umode_t *mode, void **data,
2610 			  const struct file_operations **fops)
2611 {
2612 	struct trace_event_file *file = *data;
2613 	struct trace_event_call *call = file->event_call;
2614 
2615 	if (strcmp(name, "format") == 0) {
2616 		*mode = TRACE_MODE_READ;
2617 		*fops = &ftrace_event_format_fops;
2618 		return 1;
2619 	}
2620 
2621 	/*
2622 	 * Only event directories that can be enabled should have
2623 	 * triggers or filters, with the exception of the "print"
2624 	 * event that can have a "trigger" file.
2625 	 */
2626 	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)) {
2627 		if (call->class->reg && strcmp(name, "enable") == 0) {
2628 			*mode = TRACE_MODE_WRITE;
2629 			*fops = &ftrace_enable_fops;
2630 			return 1;
2631 		}
2632 
2633 		if (strcmp(name, "filter") == 0) {
2634 			*mode = TRACE_MODE_WRITE;
2635 			*fops = &ftrace_event_filter_fops;
2636 			return 1;
2637 		}
2638 	}
2639 
2640 	if (!(call->flags & TRACE_EVENT_FL_IGNORE_ENABLE) ||
2641 	    strcmp(trace_event_name(call), "print") == 0) {
2642 		if (strcmp(name, "trigger") == 0) {
2643 			*mode = TRACE_MODE_WRITE;
2644 			*fops = &event_trigger_fops;
2645 			return 1;
2646 		}
2647 	}
2648 
2649 #ifdef CONFIG_PERF_EVENTS
2650 	if (call->event.type && call->class->reg &&
2651 	    strcmp(name, "id") == 0) {
2652 		*mode = TRACE_MODE_READ;
2653 		*data = (void *)(long)call->event.type;
2654 		*fops = &ftrace_event_id_fops;
2655 		return 1;
2656 	}
2657 #endif
2658 
2659 #ifdef CONFIG_HIST_TRIGGERS
2660 	if (strcmp(name, "hist") == 0) {
2661 		*mode = TRACE_MODE_READ;
2662 		*fops = &event_hist_fops;
2663 		return 1;
2664 	}
2665 #endif
2666 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2667 	if (strcmp(name, "hist_debug") == 0) {
2668 		*mode = TRACE_MODE_READ;
2669 		*fops = &event_hist_debug_fops;
2670 		return 1;
2671 	}
2672 #endif
2673 #ifdef CONFIG_TRACE_EVENT_INJECT
2674 	if (call->event.type && call->class->reg &&
2675 	    strcmp(name, "inject") == 0) {
2676 		*mode = 0200;
2677 		*fops = &event_inject_fops;
2678 		return 1;
2679 	}
2680 #endif
2681 	return 0;
2682 }
2683 
2684 /* The file is incremented on creation and freeing the enable file decrements it */
2685 static void event_release(const char *name, void *data)
2686 {
2687 	struct trace_event_file *file = data;
2688 
2689 	event_file_put(file);
2690 }
2691 
2692 static int
2693 event_create_dir(struct eventfs_inode *parent, struct trace_event_file *file)
2694 {
2695 	struct trace_event_call *call = file->event_call;
2696 	struct trace_array *tr = file->tr;
2697 	struct eventfs_inode *e_events;
2698 	struct eventfs_inode *ei;
2699 	const char *name;
2700 	int nr_entries;
2701 	int ret;
2702 	static struct eventfs_entry event_entries[] = {
2703 		{
2704 			.name		= "enable",
2705 			.callback	= event_callback,
2706 			.release	= event_release,
2707 		},
2708 		{
2709 			.name		= "filter",
2710 			.callback	= event_callback,
2711 		},
2712 		{
2713 			.name		= "trigger",
2714 			.callback	= event_callback,
2715 		},
2716 		{
2717 			.name		= "format",
2718 			.callback	= event_callback,
2719 		},
2720 #ifdef CONFIG_PERF_EVENTS
2721 		{
2722 			.name		= "id",
2723 			.callback	= event_callback,
2724 		},
2725 #endif
2726 #ifdef CONFIG_HIST_TRIGGERS
2727 		{
2728 			.name		= "hist",
2729 			.callback	= event_callback,
2730 		},
2731 #endif
2732 #ifdef CONFIG_HIST_TRIGGERS_DEBUG
2733 		{
2734 			.name		= "hist_debug",
2735 			.callback	= event_callback,
2736 		},
2737 #endif
2738 #ifdef CONFIG_TRACE_EVENT_INJECT
2739 		{
2740 			.name		= "inject",
2741 			.callback	= event_callback,
2742 		},
2743 #endif
2744 	};
2745 
2746 	/*
2747 	 * If the trace point header did not define TRACE_SYSTEM
2748 	 * then the system would be called "TRACE_SYSTEM". This should
2749 	 * never happen.
2750 	 */
2751 	if (WARN_ON_ONCE(strcmp(call->class->system, TRACE_SYSTEM) == 0))
2752 		return -ENODEV;
2753 
2754 	e_events = event_subsystem_dir(tr, call->class->system, file, parent);
2755 	if (!e_events)
2756 		return -ENOMEM;
2757 
2758 	nr_entries = ARRAY_SIZE(event_entries);
2759 
2760 	name = trace_event_name(call);
2761 	ei = eventfs_create_dir(name, e_events, event_entries, nr_entries, file);
2762 	if (IS_ERR(ei)) {
2763 		pr_warn("Could not create tracefs '%s' directory\n", name);
2764 		return -1;
2765 	}
2766 
2767 	file->ei = ei;
2768 
2769 	ret = event_define_fields(call);
2770 	if (ret < 0) {
2771 		pr_warn("Could not initialize trace point events/%s\n", name);
2772 		return ret;
2773 	}
2774 
2775 	/* Gets decremented on freeing of the "enable" file */
2776 	event_file_get(file);
2777 
2778 	return 0;
2779 }
2780 
2781 static void remove_event_from_tracers(struct trace_event_call *call)
2782 {
2783 	struct trace_event_file *file;
2784 	struct trace_array *tr;
2785 
2786 	do_for_each_event_file_safe(tr, file) {
2787 		if (file->event_call != call)
2788 			continue;
2789 
2790 		remove_event_file_dir(file);
2791 		/*
2792 		 * The do_for_each_event_file_safe() is
2793 		 * a double loop. After finding the call for this
2794 		 * trace_array, we use break to jump to the next
2795 		 * trace_array.
2796 		 */
2797 		break;
2798 	} while_for_each_event_file();
2799 }
2800 
2801 static void event_remove(struct trace_event_call *call)
2802 {
2803 	struct trace_array *tr;
2804 	struct trace_event_file *file;
2805 
2806 	do_for_each_event_file(tr, file) {
2807 		if (file->event_call != call)
2808 			continue;
2809 
2810 		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
2811 			tr->clear_trace = true;
2812 
2813 		ftrace_event_enable_disable(file, 0);
2814 		/*
2815 		 * The do_for_each_event_file() is
2816 		 * a double loop. After finding the call for this
2817 		 * trace_array, we use break to jump to the next
2818 		 * trace_array.
2819 		 */
2820 		break;
2821 	} while_for_each_event_file();
2822 
2823 	if (call->event.funcs)
2824 		__unregister_trace_event(&call->event);
2825 	remove_event_from_tracers(call);
2826 	list_del(&call->list);
2827 }
2828 
2829 static int event_init(struct trace_event_call *call)
2830 {
2831 	int ret = 0;
2832 	const char *name;
2833 
2834 	name = trace_event_name(call);
2835 	if (WARN_ON(!name))
2836 		return -EINVAL;
2837 
2838 	if (call->class->raw_init) {
2839 		ret = call->class->raw_init(call);
2840 		if (ret < 0 && ret != -ENOSYS)
2841 			pr_warn("Could not initialize trace events/%s\n", name);
2842 	}
2843 
2844 	return ret;
2845 }
2846 
2847 static int
2848 __register_event(struct trace_event_call *call, struct module *mod)
2849 {
2850 	int ret;
2851 
2852 	ret = event_init(call);
2853 	if (ret < 0)
2854 		return ret;
2855 
2856 	list_add(&call->list, &ftrace_events);
2857 	if (call->flags & TRACE_EVENT_FL_DYNAMIC)
2858 		atomic_set(&call->refcnt, 0);
2859 	else
2860 		call->module = mod;
2861 
2862 	return 0;
2863 }
2864 
2865 static char *eval_replace(char *ptr, struct trace_eval_map *map, int len)
2866 {
2867 	int rlen;
2868 	int elen;
2869 
2870 	/* Find the length of the eval value as a string */
2871 	elen = snprintf(ptr, 0, "%ld", map->eval_value);
2872 	/* Make sure there's enough room to replace the string with the value */
2873 	if (len < elen)
2874 		return NULL;
2875 
2876 	snprintf(ptr, elen + 1, "%ld", map->eval_value);
2877 
2878 	/* Get the rest of the string of ptr */
2879 	rlen = strlen(ptr + len);
2880 	memmove(ptr + elen, ptr + len, rlen);
2881 	/* Make sure we end the new string */
2882 	ptr[elen + rlen] = 0;
2883 
2884 	return ptr + elen;
2885 }
2886 
2887 static void update_event_printk(struct trace_event_call *call,
2888 				struct trace_eval_map *map)
2889 {
2890 	char *ptr;
2891 	int quote = 0;
2892 	int len = strlen(map->eval_string);
2893 
2894 	for (ptr = call->print_fmt; *ptr; ptr++) {
2895 		if (*ptr == '\\') {
2896 			ptr++;
2897 			/* paranoid */
2898 			if (!*ptr)
2899 				break;
2900 			continue;
2901 		}
2902 		if (*ptr == '"') {
2903 			quote ^= 1;
2904 			continue;
2905 		}
2906 		if (quote)
2907 			continue;
2908 		if (isdigit(*ptr)) {
2909 			/* skip numbers */
2910 			do {
2911 				ptr++;
2912 				/* Check for alpha chars like ULL */
2913 			} while (isalnum(*ptr));
2914 			if (!*ptr)
2915 				break;
2916 			/*
2917 			 * A number must have some kind of delimiter after
2918 			 * it, and we can ignore that too.
2919 			 */
2920 			continue;
2921 		}
2922 		if (isalpha(*ptr) || *ptr == '_') {
2923 			if (strncmp(map->eval_string, ptr, len) == 0 &&
2924 			    !isalnum(ptr[len]) && ptr[len] != '_') {
2925 				ptr = eval_replace(ptr, map, len);
2926 				/* enum/sizeof string smaller than value */
2927 				if (WARN_ON_ONCE(!ptr))
2928 					return;
2929 				/*
2930 				 * No need to decrement here, as eval_replace()
2931 				 * returns the pointer to the character passed
2932 				 * the eval, and two evals can not be placed
2933 				 * back to back without something in between.
2934 				 * We can skip that something in between.
2935 				 */
2936 				continue;
2937 			}
2938 		skip_more:
2939 			do {
2940 				ptr++;
2941 			} while (isalnum(*ptr) || *ptr == '_');
2942 			if (!*ptr)
2943 				break;
2944 			/*
2945 			 * If what comes after this variable is a '.' or
2946 			 * '->' then we can continue to ignore that string.
2947 			 */
2948 			if (*ptr == '.' || (ptr[0] == '-' && ptr[1] == '>')) {
2949 				ptr += *ptr == '.' ? 1 : 2;
2950 				if (!*ptr)
2951 					break;
2952 				goto skip_more;
2953 			}
2954 			/*
2955 			 * Once again, we can skip the delimiter that came
2956 			 * after the string.
2957 			 */
2958 			continue;
2959 		}
2960 	}
2961 }
2962 
2963 static void add_str_to_module(struct module *module, char *str)
2964 {
2965 	struct module_string *modstr;
2966 
2967 	modstr = kmalloc(sizeof(*modstr), GFP_KERNEL);
2968 
2969 	/*
2970 	 * If we failed to allocate memory here, then we'll just
2971 	 * let the str memory leak when the module is removed.
2972 	 * If this fails to allocate, there's worse problems than
2973 	 * a leaked string on module removal.
2974 	 */
2975 	if (WARN_ON_ONCE(!modstr))
2976 		return;
2977 
2978 	modstr->module = module;
2979 	modstr->str = str;
2980 
2981 	list_add(&modstr->next, &module_strings);
2982 }
2983 
2984 static void update_event_fields(struct trace_event_call *call,
2985 				struct trace_eval_map *map)
2986 {
2987 	struct ftrace_event_field *field;
2988 	struct list_head *head;
2989 	char *ptr;
2990 	char *str;
2991 	int len = strlen(map->eval_string);
2992 
2993 	/* Dynamic events should never have field maps */
2994 	if (WARN_ON_ONCE(call->flags & TRACE_EVENT_FL_DYNAMIC))
2995 		return;
2996 
2997 	head = trace_get_fields(call);
2998 	list_for_each_entry(field, head, link) {
2999 		ptr = strchr(field->type, '[');
3000 		if (!ptr)
3001 			continue;
3002 		ptr++;
3003 
3004 		if (!isalpha(*ptr) && *ptr != '_')
3005 			continue;
3006 
3007 		if (strncmp(map->eval_string, ptr, len) != 0)
3008 			continue;
3009 
3010 		str = kstrdup(field->type, GFP_KERNEL);
3011 		if (WARN_ON_ONCE(!str))
3012 			return;
3013 		ptr = str + (ptr - field->type);
3014 		ptr = eval_replace(ptr, map, len);
3015 		/* enum/sizeof string smaller than value */
3016 		if (WARN_ON_ONCE(!ptr)) {
3017 			kfree(str);
3018 			continue;
3019 		}
3020 
3021 		/*
3022 		 * If the event is part of a module, then we need to free the string
3023 		 * when the module is removed. Otherwise, it will stay allocated
3024 		 * until a reboot.
3025 		 */
3026 		if (call->module)
3027 			add_str_to_module(call->module, str);
3028 
3029 		field->type = str;
3030 	}
3031 }
3032 
3033 void trace_event_eval_update(struct trace_eval_map **map, int len)
3034 {
3035 	struct trace_event_call *call, *p;
3036 	const char *last_system = NULL;
3037 	bool first = false;
3038 	int last_i;
3039 	int i;
3040 
3041 	down_write(&trace_event_sem);
3042 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
3043 		/* events are usually grouped together with systems */
3044 		if (!last_system || call->class->system != last_system) {
3045 			first = true;
3046 			last_i = 0;
3047 			last_system = call->class->system;
3048 		}
3049 
3050 		/*
3051 		 * Since calls are grouped by systems, the likelihood that the
3052 		 * next call in the iteration belongs to the same system as the
3053 		 * previous call is high. As an optimization, we skip searching
3054 		 * for a map[] that matches the call's system if the last call
3055 		 * was from the same system. That's what last_i is for. If the
3056 		 * call has the same system as the previous call, then last_i
3057 		 * will be the index of the first map[] that has a matching
3058 		 * system.
3059 		 */
3060 		for (i = last_i; i < len; i++) {
3061 			if (call->class->system == map[i]->system) {
3062 				/* Save the first system if need be */
3063 				if (first) {
3064 					last_i = i;
3065 					first = false;
3066 				}
3067 				update_event_printk(call, map[i]);
3068 				update_event_fields(call, map[i]);
3069 			}
3070 		}
3071 		cond_resched();
3072 	}
3073 	up_write(&trace_event_sem);
3074 }
3075 
3076 static bool event_in_systems(struct trace_event_call *call,
3077 			     const char *systems)
3078 {
3079 	const char *system;
3080 	const char *p;
3081 
3082 	if (!systems)
3083 		return true;
3084 
3085 	system = call->class->system;
3086 	p = strstr(systems, system);
3087 	if (!p)
3088 		return false;
3089 
3090 	if (p != systems && !isspace(*(p - 1)) && *(p - 1) != ',')
3091 		return false;
3092 
3093 	p += strlen(system);
3094 	return !*p || isspace(*p) || *p == ',';
3095 }
3096 
3097 #ifdef CONFIG_HIST_TRIGGERS
3098 /*
3099  * Wake up waiter on the hist_poll_wq from irq_work because the hist trigger
3100  * may happen in any context.
3101  */
3102 static void hist_poll_event_irq_work(struct irq_work *work)
3103 {
3104 	wake_up_all(&hist_poll_wq);
3105 }
3106 
3107 DEFINE_IRQ_WORK(hist_poll_work, hist_poll_event_irq_work);
3108 DECLARE_WAIT_QUEUE_HEAD(hist_poll_wq);
3109 #endif
3110 
3111 static struct trace_event_file *
3112 trace_create_new_event(struct trace_event_call *call,
3113 		       struct trace_array *tr)
3114 {
3115 	struct trace_pid_list *no_pid_list;
3116 	struct trace_pid_list *pid_list;
3117 	struct trace_event_file *file;
3118 	unsigned int first;
3119 
3120 	if (!event_in_systems(call, tr->system_names))
3121 		return NULL;
3122 
3123 	file = kmem_cache_alloc(file_cachep, GFP_TRACE);
3124 	if (!file)
3125 		return ERR_PTR(-ENOMEM);
3126 
3127 	pid_list = rcu_dereference_protected(tr->filtered_pids,
3128 					     lockdep_is_held(&event_mutex));
3129 	no_pid_list = rcu_dereference_protected(tr->filtered_no_pids,
3130 					     lockdep_is_held(&event_mutex));
3131 
3132 	if (!trace_pid_list_first(pid_list, &first) ||
3133 	    !trace_pid_list_first(no_pid_list, &first))
3134 		file->flags |= EVENT_FILE_FL_PID_FILTER;
3135 
3136 	file->event_call = call;
3137 	file->tr = tr;
3138 	atomic_set(&file->sm_ref, 0);
3139 	atomic_set(&file->tm_ref, 0);
3140 	INIT_LIST_HEAD(&file->triggers);
3141 	list_add(&file->list, &tr->events);
3142 	refcount_set(&file->ref, 1);
3143 
3144 	return file;
3145 }
3146 
3147 #define MAX_BOOT_TRIGGERS 32
3148 
3149 static struct boot_triggers {
3150 	const char		*event;
3151 	char			*trigger;
3152 } bootup_triggers[MAX_BOOT_TRIGGERS];
3153 
3154 static char bootup_trigger_buf[COMMAND_LINE_SIZE];
3155 static int nr_boot_triggers;
3156 
3157 static __init int setup_trace_triggers(char *str)
3158 {
3159 	char *trigger;
3160 	char *buf;
3161 	int i;
3162 
3163 	strscpy(bootup_trigger_buf, str, COMMAND_LINE_SIZE);
3164 	trace_set_ring_buffer_expanded(NULL);
3165 	disable_tracing_selftest("running event triggers");
3166 
3167 	buf = bootup_trigger_buf;
3168 	for (i = 0; i < MAX_BOOT_TRIGGERS; i++) {
3169 		trigger = strsep(&buf, ",");
3170 		if (!trigger)
3171 			break;
3172 		bootup_triggers[i].event = strsep(&trigger, ".");
3173 		bootup_triggers[i].trigger = trigger;
3174 		if (!bootup_triggers[i].trigger)
3175 			break;
3176 	}
3177 
3178 	nr_boot_triggers = i;
3179 	return 1;
3180 }
3181 __setup("trace_trigger=", setup_trace_triggers);
3182 
3183 /* Add an event to a trace directory */
3184 static int
3185 __trace_add_new_event(struct trace_event_call *call, struct trace_array *tr)
3186 {
3187 	struct trace_event_file *file;
3188 
3189 	file = trace_create_new_event(call, tr);
3190 	/*
3191 	 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
3192 	 * allocation, or NULL if the event is not part of the tr->system_names.
3193 	 * When the event is not part of the tr->system_names, return zero, not
3194 	 * an error.
3195 	 */
3196 	if (!file)
3197 		return 0;
3198 
3199 	if (IS_ERR(file))
3200 		return PTR_ERR(file);
3201 
3202 	if (eventdir_initialized)
3203 		return event_create_dir(tr->event_dir, file);
3204 	else
3205 		return event_define_fields(call);
3206 }
3207 
3208 static void trace_early_triggers(struct trace_event_file *file, const char *name)
3209 {
3210 	int ret;
3211 	int i;
3212 
3213 	for (i = 0; i < nr_boot_triggers; i++) {
3214 		if (strcmp(name, bootup_triggers[i].event))
3215 			continue;
3216 		mutex_lock(&event_mutex);
3217 		ret = trigger_process_regex(file, bootup_triggers[i].trigger);
3218 		mutex_unlock(&event_mutex);
3219 		if (ret)
3220 			pr_err("Failed to register trigger '%s' on event %s\n",
3221 			       bootup_triggers[i].trigger,
3222 			       bootup_triggers[i].event);
3223 	}
3224 }
3225 
3226 /*
3227  * Just create a descriptor for early init. A descriptor is required
3228  * for enabling events at boot. We want to enable events before
3229  * the filesystem is initialized.
3230  */
3231 static int
3232 __trace_early_add_new_event(struct trace_event_call *call,
3233 			    struct trace_array *tr)
3234 {
3235 	struct trace_event_file *file;
3236 	int ret;
3237 
3238 	file = trace_create_new_event(call, tr);
3239 	/*
3240 	 * trace_create_new_event() returns ERR_PTR(-ENOMEM) if failed
3241 	 * allocation, or NULL if the event is not part of the tr->system_names.
3242 	 * When the event is not part of the tr->system_names, return zero, not
3243 	 * an error.
3244 	 */
3245 	if (!file)
3246 		return 0;
3247 
3248 	if (IS_ERR(file))
3249 		return PTR_ERR(file);
3250 
3251 	ret = event_define_fields(call);
3252 	if (ret)
3253 		return ret;
3254 
3255 	trace_early_triggers(file, trace_event_name(call));
3256 
3257 	return 0;
3258 }
3259 
3260 struct ftrace_module_file_ops;
3261 static void __add_event_to_tracers(struct trace_event_call *call);
3262 
3263 /* Add an additional event_call dynamically */
3264 int trace_add_event_call(struct trace_event_call *call)
3265 {
3266 	int ret;
3267 	lockdep_assert_held(&event_mutex);
3268 
3269 	guard(mutex)(&trace_types_lock);
3270 
3271 	ret = __register_event(call, NULL);
3272 	if (ret < 0)
3273 		return ret;
3274 
3275 	__add_event_to_tracers(call);
3276 	return ret;
3277 }
3278 EXPORT_SYMBOL_GPL(trace_add_event_call);
3279 
3280 /*
3281  * Must be called under locking of trace_types_lock, event_mutex and
3282  * trace_event_sem.
3283  */
3284 static void __trace_remove_event_call(struct trace_event_call *call)
3285 {
3286 	event_remove(call);
3287 	trace_destroy_fields(call);
3288 }
3289 
3290 static int probe_remove_event_call(struct trace_event_call *call)
3291 {
3292 	struct trace_array *tr;
3293 	struct trace_event_file *file;
3294 
3295 #ifdef CONFIG_PERF_EVENTS
3296 	if (call->perf_refcount)
3297 		return -EBUSY;
3298 #endif
3299 	do_for_each_event_file(tr, file) {
3300 		if (file->event_call != call)
3301 			continue;
3302 		/*
3303 		 * We can't rely on ftrace_event_enable_disable(enable => 0)
3304 		 * we are going to do, EVENT_FILE_FL_SOFT_MODE can suppress
3305 		 * TRACE_REG_UNREGISTER.
3306 		 */
3307 		if (file->flags & EVENT_FILE_FL_ENABLED)
3308 			goto busy;
3309 
3310 		if (file->flags & EVENT_FILE_FL_WAS_ENABLED)
3311 			tr->clear_trace = true;
3312 		/*
3313 		 * The do_for_each_event_file_safe() is
3314 		 * a double loop. After finding the call for this
3315 		 * trace_array, we use break to jump to the next
3316 		 * trace_array.
3317 		 */
3318 		break;
3319 	} while_for_each_event_file();
3320 
3321 	__trace_remove_event_call(call);
3322 
3323 	return 0;
3324  busy:
3325 	/* No need to clear the trace now */
3326 	list_for_each_entry(tr, &ftrace_trace_arrays, list) {
3327 		tr->clear_trace = false;
3328 	}
3329 	return -EBUSY;
3330 }
3331 
3332 /* Remove an event_call */
3333 int trace_remove_event_call(struct trace_event_call *call)
3334 {
3335 	int ret;
3336 
3337 	lockdep_assert_held(&event_mutex);
3338 
3339 	mutex_lock(&trace_types_lock);
3340 	down_write(&trace_event_sem);
3341 	ret = probe_remove_event_call(call);
3342 	up_write(&trace_event_sem);
3343 	mutex_unlock(&trace_types_lock);
3344 
3345 	return ret;
3346 }
3347 EXPORT_SYMBOL_GPL(trace_remove_event_call);
3348 
3349 #define for_each_event(event, start, end)			\
3350 	for (event = start;					\
3351 	     (unsigned long)event < (unsigned long)end;		\
3352 	     event++)
3353 
3354 #ifdef CONFIG_MODULES
3355 
3356 static void trace_module_add_events(struct module *mod)
3357 {
3358 	struct trace_event_call **call, **start, **end;
3359 
3360 	if (!mod->num_trace_events)
3361 		return;
3362 
3363 	/* Don't add infrastructure for mods without tracepoints */
3364 	if (trace_module_has_bad_taint(mod)) {
3365 		pr_err("%s: module has bad taint, not creating trace events\n",
3366 		       mod->name);
3367 		return;
3368 	}
3369 
3370 	start = mod->trace_events;
3371 	end = mod->trace_events + mod->num_trace_events;
3372 
3373 	for_each_event(call, start, end) {
3374 		__register_event(*call, mod);
3375 		__add_event_to_tracers(*call);
3376 	}
3377 }
3378 
3379 static void trace_module_remove_events(struct module *mod)
3380 {
3381 	struct trace_event_call *call, *p;
3382 	struct module_string *modstr, *m;
3383 
3384 	down_write(&trace_event_sem);
3385 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
3386 		if ((call->flags & TRACE_EVENT_FL_DYNAMIC) || !call->module)
3387 			continue;
3388 		if (call->module == mod)
3389 			__trace_remove_event_call(call);
3390 	}
3391 	/* Check for any strings allocade for this module */
3392 	list_for_each_entry_safe(modstr, m, &module_strings, next) {
3393 		if (modstr->module != mod)
3394 			continue;
3395 		list_del(&modstr->next);
3396 		kfree(modstr->str);
3397 		kfree(modstr);
3398 	}
3399 	up_write(&trace_event_sem);
3400 
3401 	/*
3402 	 * It is safest to reset the ring buffer if the module being unloaded
3403 	 * registered any events that were used. The only worry is if
3404 	 * a new module gets loaded, and takes on the same id as the events
3405 	 * of this module. When printing out the buffer, traced events left
3406 	 * over from this module may be passed to the new module events and
3407 	 * unexpected results may occur.
3408 	 */
3409 	tracing_reset_all_online_cpus_unlocked();
3410 }
3411 
3412 static int trace_module_notify(struct notifier_block *self,
3413 			       unsigned long val, void *data)
3414 {
3415 	struct module *mod = data;
3416 
3417 	mutex_lock(&event_mutex);
3418 	mutex_lock(&trace_types_lock);
3419 	switch (val) {
3420 	case MODULE_STATE_COMING:
3421 		trace_module_add_events(mod);
3422 		break;
3423 	case MODULE_STATE_GOING:
3424 		trace_module_remove_events(mod);
3425 		break;
3426 	}
3427 	mutex_unlock(&trace_types_lock);
3428 	mutex_unlock(&event_mutex);
3429 
3430 	return NOTIFY_OK;
3431 }
3432 
3433 static struct notifier_block trace_module_nb = {
3434 	.notifier_call = trace_module_notify,
3435 	.priority = 1, /* higher than trace.c module notify */
3436 };
3437 #endif /* CONFIG_MODULES */
3438 
3439 /* Create a new event directory structure for a trace directory. */
3440 static void
3441 __trace_add_event_dirs(struct trace_array *tr)
3442 {
3443 	struct trace_event_call *call;
3444 	int ret;
3445 
3446 	list_for_each_entry(call, &ftrace_events, list) {
3447 		ret = __trace_add_new_event(call, tr);
3448 		if (ret < 0)
3449 			pr_warn("Could not create directory for event %s\n",
3450 				trace_event_name(call));
3451 	}
3452 }
3453 
3454 /* Returns any file that matches the system and event */
3455 struct trace_event_file *
3456 __find_event_file(struct trace_array *tr, const char *system, const char *event)
3457 {
3458 	struct trace_event_file *file;
3459 	struct trace_event_call *call;
3460 	const char *name;
3461 
3462 	list_for_each_entry(file, &tr->events, list) {
3463 
3464 		call = file->event_call;
3465 		name = trace_event_name(call);
3466 
3467 		if (!name || !call->class)
3468 			continue;
3469 
3470 		if (strcmp(event, name) == 0 &&
3471 		    strcmp(system, call->class->system) == 0)
3472 			return file;
3473 	}
3474 	return NULL;
3475 }
3476 
3477 /* Returns valid trace event files that match system and event */
3478 struct trace_event_file *
3479 find_event_file(struct trace_array *tr, const char *system, const char *event)
3480 {
3481 	struct trace_event_file *file;
3482 
3483 	file = __find_event_file(tr, system, event);
3484 	if (!file || !file->event_call->class->reg ||
3485 	    file->event_call->flags & TRACE_EVENT_FL_IGNORE_ENABLE)
3486 		return NULL;
3487 
3488 	return file;
3489 }
3490 
3491 /**
3492  * trace_get_event_file - Find and return a trace event file
3493  * @instance: The name of the trace instance containing the event
3494  * @system: The name of the system containing the event
3495  * @event: The name of the event
3496  *
3497  * Return a trace event file given the trace instance name, trace
3498  * system, and trace event name.  If the instance name is NULL, it
3499  * refers to the top-level trace array.
3500  *
3501  * This function will look it up and return it if found, after calling
3502  * trace_array_get() to prevent the instance from going away, and
3503  * increment the event's module refcount to prevent it from being
3504  * removed.
3505  *
3506  * To release the file, call trace_put_event_file(), which will call
3507  * trace_array_put() and decrement the event's module refcount.
3508  *
3509  * Return: The trace event on success, ERR_PTR otherwise.
3510  */
3511 struct trace_event_file *trace_get_event_file(const char *instance,
3512 					      const char *system,
3513 					      const char *event)
3514 {
3515 	struct trace_array *tr = top_trace_array();
3516 	struct trace_event_file *file = NULL;
3517 	int ret = -EINVAL;
3518 
3519 	if (instance) {
3520 		tr = trace_array_find_get(instance);
3521 		if (!tr)
3522 			return ERR_PTR(-ENOENT);
3523 	} else {
3524 		ret = trace_array_get(tr);
3525 		if (ret)
3526 			return ERR_PTR(ret);
3527 	}
3528 
3529 	guard(mutex)(&event_mutex);
3530 
3531 	file = find_event_file(tr, system, event);
3532 	if (!file) {
3533 		trace_array_put(tr);
3534 		return ERR_PTR(-EINVAL);
3535 	}
3536 
3537 	/* Don't let event modules unload while in use */
3538 	ret = trace_event_try_get_ref(file->event_call);
3539 	if (!ret) {
3540 		trace_array_put(tr);
3541 		return ERR_PTR(-EBUSY);
3542 	}
3543 
3544 	return file;
3545 }
3546 EXPORT_SYMBOL_GPL(trace_get_event_file);
3547 
3548 /**
3549  * trace_put_event_file - Release a file from trace_get_event_file()
3550  * @file: The trace event file
3551  *
3552  * If a file was retrieved using trace_get_event_file(), this should
3553  * be called when it's no longer needed.  It will cancel the previous
3554  * trace_array_get() called by that function, and decrement the
3555  * event's module refcount.
3556  */
3557 void trace_put_event_file(struct trace_event_file *file)
3558 {
3559 	mutex_lock(&event_mutex);
3560 	trace_event_put_ref(file->event_call);
3561 	mutex_unlock(&event_mutex);
3562 
3563 	trace_array_put(file->tr);
3564 }
3565 EXPORT_SYMBOL_GPL(trace_put_event_file);
3566 
3567 #ifdef CONFIG_DYNAMIC_FTRACE
3568 
3569 /* Avoid typos */
3570 #define ENABLE_EVENT_STR	"enable_event"
3571 #define DISABLE_EVENT_STR	"disable_event"
3572 
3573 struct event_probe_data {
3574 	struct trace_event_file	*file;
3575 	unsigned long			count;
3576 	int				ref;
3577 	bool				enable;
3578 };
3579 
3580 static void update_event_probe(struct event_probe_data *data)
3581 {
3582 	if (data->enable)
3583 		clear_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3584 	else
3585 		set_bit(EVENT_FILE_FL_SOFT_DISABLED_BIT, &data->file->flags);
3586 }
3587 
3588 static void
3589 event_enable_probe(unsigned long ip, unsigned long parent_ip,
3590 		   struct trace_array *tr, struct ftrace_probe_ops *ops,
3591 		   void *data)
3592 {
3593 	struct ftrace_func_mapper *mapper = data;
3594 	struct event_probe_data *edata;
3595 	void **pdata;
3596 
3597 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3598 	if (!pdata || !*pdata)
3599 		return;
3600 
3601 	edata = *pdata;
3602 	update_event_probe(edata);
3603 }
3604 
3605 static void
3606 event_enable_count_probe(unsigned long ip, unsigned long parent_ip,
3607 			 struct trace_array *tr, struct ftrace_probe_ops *ops,
3608 			 void *data)
3609 {
3610 	struct ftrace_func_mapper *mapper = data;
3611 	struct event_probe_data *edata;
3612 	void **pdata;
3613 
3614 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3615 	if (!pdata || !*pdata)
3616 		return;
3617 
3618 	edata = *pdata;
3619 
3620 	if (!edata->count)
3621 		return;
3622 
3623 	/* Skip if the event is in a state we want to switch to */
3624 	if (edata->enable == !(edata->file->flags & EVENT_FILE_FL_SOFT_DISABLED))
3625 		return;
3626 
3627 	if (edata->count != -1)
3628 		(edata->count)--;
3629 
3630 	update_event_probe(edata);
3631 }
3632 
3633 static int
3634 event_enable_print(struct seq_file *m, unsigned long ip,
3635 		   struct ftrace_probe_ops *ops, void *data)
3636 {
3637 	struct ftrace_func_mapper *mapper = data;
3638 	struct event_probe_data *edata;
3639 	void **pdata;
3640 
3641 	pdata = ftrace_func_mapper_find_ip(mapper, ip);
3642 
3643 	if (WARN_ON_ONCE(!pdata || !*pdata))
3644 		return 0;
3645 
3646 	edata = *pdata;
3647 
3648 	seq_printf(m, "%ps:", (void *)ip);
3649 
3650 	seq_printf(m, "%s:%s:%s",
3651 		   edata->enable ? ENABLE_EVENT_STR : DISABLE_EVENT_STR,
3652 		   edata->file->event_call->class->system,
3653 		   trace_event_name(edata->file->event_call));
3654 
3655 	if (edata->count == -1)
3656 		seq_puts(m, ":unlimited\n");
3657 	else
3658 		seq_printf(m, ":count=%ld\n", edata->count);
3659 
3660 	return 0;
3661 }
3662 
3663 static int
3664 event_enable_init(struct ftrace_probe_ops *ops, struct trace_array *tr,
3665 		  unsigned long ip, void *init_data, void **data)
3666 {
3667 	struct ftrace_func_mapper *mapper = *data;
3668 	struct event_probe_data *edata = init_data;
3669 	int ret;
3670 
3671 	if (!mapper) {
3672 		mapper = allocate_ftrace_func_mapper();
3673 		if (!mapper)
3674 			return -ENODEV;
3675 		*data = mapper;
3676 	}
3677 
3678 	ret = ftrace_func_mapper_add_ip(mapper, ip, edata);
3679 	if (ret < 0)
3680 		return ret;
3681 
3682 	edata->ref++;
3683 
3684 	return 0;
3685 }
3686 
3687 static int free_probe_data(void *data)
3688 {
3689 	struct event_probe_data *edata = data;
3690 
3691 	edata->ref--;
3692 	if (!edata->ref) {
3693 		/* Remove the SOFT_MODE flag */
3694 		__ftrace_event_enable_disable(edata->file, 0, 1);
3695 		trace_event_put_ref(edata->file->event_call);
3696 		kfree(edata);
3697 	}
3698 	return 0;
3699 }
3700 
3701 static void
3702 event_enable_free(struct ftrace_probe_ops *ops, struct trace_array *tr,
3703 		  unsigned long ip, void *data)
3704 {
3705 	struct ftrace_func_mapper *mapper = data;
3706 	struct event_probe_data *edata;
3707 
3708 	if (!ip) {
3709 		if (!mapper)
3710 			return;
3711 		free_ftrace_func_mapper(mapper, free_probe_data);
3712 		return;
3713 	}
3714 
3715 	edata = ftrace_func_mapper_remove_ip(mapper, ip);
3716 
3717 	if (WARN_ON_ONCE(!edata))
3718 		return;
3719 
3720 	if (WARN_ON_ONCE(edata->ref <= 0))
3721 		return;
3722 
3723 	free_probe_data(edata);
3724 }
3725 
3726 static struct ftrace_probe_ops event_enable_probe_ops = {
3727 	.func			= event_enable_probe,
3728 	.print			= event_enable_print,
3729 	.init			= event_enable_init,
3730 	.free			= event_enable_free,
3731 };
3732 
3733 static struct ftrace_probe_ops event_enable_count_probe_ops = {
3734 	.func			= event_enable_count_probe,
3735 	.print			= event_enable_print,
3736 	.init			= event_enable_init,
3737 	.free			= event_enable_free,
3738 };
3739 
3740 static struct ftrace_probe_ops event_disable_probe_ops = {
3741 	.func			= event_enable_probe,
3742 	.print			= event_enable_print,
3743 	.init			= event_enable_init,
3744 	.free			= event_enable_free,
3745 };
3746 
3747 static struct ftrace_probe_ops event_disable_count_probe_ops = {
3748 	.func			= event_enable_count_probe,
3749 	.print			= event_enable_print,
3750 	.init			= event_enable_init,
3751 	.free			= event_enable_free,
3752 };
3753 
3754 static int
3755 event_enable_func(struct trace_array *tr, struct ftrace_hash *hash,
3756 		  char *glob, char *cmd, char *param, int enabled)
3757 {
3758 	struct trace_event_file *file;
3759 	struct ftrace_probe_ops *ops;
3760 	struct event_probe_data *data;
3761 	unsigned long count = -1;
3762 	const char *system;
3763 	const char *event;
3764 	char *number;
3765 	bool enable;
3766 	int ret;
3767 
3768 	if (!tr)
3769 		return -ENODEV;
3770 
3771 	/* hash funcs only work with set_ftrace_filter */
3772 	if (!enabled || !param)
3773 		return -EINVAL;
3774 
3775 	system = strsep(&param, ":");
3776 	if (!param)
3777 		return -EINVAL;
3778 
3779 	event = strsep(&param, ":");
3780 
3781 	guard(mutex)(&event_mutex);
3782 
3783 	file = find_event_file(tr, system, event);
3784 	if (!file)
3785 		return -EINVAL;
3786 
3787 	enable = strcmp(cmd, ENABLE_EVENT_STR) == 0;
3788 
3789 	if (enable)
3790 		ops = param ? &event_enable_count_probe_ops : &event_enable_probe_ops;
3791 	else
3792 		ops = param ? &event_disable_count_probe_ops : &event_disable_probe_ops;
3793 
3794 	if (glob[0] == '!')
3795 		return unregister_ftrace_function_probe_func(glob+1, tr, ops);
3796 
3797 	if (param) {
3798 		number = strsep(&param, ":");
3799 
3800 		if (!strlen(number))
3801 			return -EINVAL;
3802 
3803 		/*
3804 		 * We use the callback data field (which is a pointer)
3805 		 * as our counter.
3806 		 */
3807 		ret = kstrtoul(number, 0, &count);
3808 		if (ret)
3809 			return ret;
3810 	}
3811 
3812 	/* Don't let event modules unload while probe registered */
3813 	ret = trace_event_try_get_ref(file->event_call);
3814 	if (!ret)
3815 		return -EBUSY;
3816 
3817 	ret = __ftrace_event_enable_disable(file, 1, 1);
3818 	if (ret < 0)
3819 		goto out_put;
3820 
3821 	ret = -ENOMEM;
3822 	data = kzalloc(sizeof(*data), GFP_KERNEL);
3823 	if (!data)
3824 		goto out_put;
3825 
3826 	data->enable = enable;
3827 	data->count = count;
3828 	data->file = file;
3829 
3830 	ret = register_ftrace_function_probe(glob, tr, ops, data);
3831 	/*
3832 	 * The above returns on success the # of functions enabled,
3833 	 * but if it didn't find any functions it returns zero.
3834 	 * Consider no functions a failure too.
3835 	 */
3836 
3837 	/* Just return zero, not the number of enabled functions */
3838 	if (ret > 0)
3839 		return 0;
3840 
3841 	kfree(data);
3842 
3843 	if (!ret)
3844 		ret = -ENOENT;
3845 
3846 	__ftrace_event_enable_disable(file, 0, 1);
3847  out_put:
3848 	trace_event_put_ref(file->event_call);
3849 	return ret;
3850 }
3851 
3852 static struct ftrace_func_command event_enable_cmd = {
3853 	.name			= ENABLE_EVENT_STR,
3854 	.func			= event_enable_func,
3855 };
3856 
3857 static struct ftrace_func_command event_disable_cmd = {
3858 	.name			= DISABLE_EVENT_STR,
3859 	.func			= event_enable_func,
3860 };
3861 
3862 static __init int register_event_cmds(void)
3863 {
3864 	int ret;
3865 
3866 	ret = register_ftrace_command(&event_enable_cmd);
3867 	if (WARN_ON(ret < 0))
3868 		return ret;
3869 	ret = register_ftrace_command(&event_disable_cmd);
3870 	if (WARN_ON(ret < 0))
3871 		unregister_ftrace_command(&event_enable_cmd);
3872 	return ret;
3873 }
3874 #else
3875 static inline int register_event_cmds(void) { return 0; }
3876 #endif /* CONFIG_DYNAMIC_FTRACE */
3877 
3878 /*
3879  * The top level array and trace arrays created by boot-time tracing
3880  * have already had its trace_event_file descriptors created in order
3881  * to allow for early events to be recorded.
3882  * This function is called after the tracefs has been initialized,
3883  * and we now have to create the files associated to the events.
3884  */
3885 static void __trace_early_add_event_dirs(struct trace_array *tr)
3886 {
3887 	struct trace_event_file *file;
3888 	int ret;
3889 
3890 
3891 	list_for_each_entry(file, &tr->events, list) {
3892 		ret = event_create_dir(tr->event_dir, file);
3893 		if (ret < 0)
3894 			pr_warn("Could not create directory for event %s\n",
3895 				trace_event_name(file->event_call));
3896 	}
3897 }
3898 
3899 /*
3900  * For early boot up, the top trace array and the trace arrays created
3901  * by boot-time tracing require to have a list of events that can be
3902  * enabled. This must be done before the filesystem is set up in order
3903  * to allow events to be traced early.
3904  */
3905 void __trace_early_add_events(struct trace_array *tr)
3906 {
3907 	struct trace_event_call *call;
3908 	int ret;
3909 
3910 	list_for_each_entry(call, &ftrace_events, list) {
3911 		/* Early boot up should not have any modules loaded */
3912 		if (!(call->flags & TRACE_EVENT_FL_DYNAMIC) &&
3913 		    WARN_ON_ONCE(call->module))
3914 			continue;
3915 
3916 		ret = __trace_early_add_new_event(call, tr);
3917 		if (ret < 0)
3918 			pr_warn("Could not create early event %s\n",
3919 				trace_event_name(call));
3920 	}
3921 }
3922 
3923 /* Remove the event directory structure for a trace directory. */
3924 static void
3925 __trace_remove_event_dirs(struct trace_array *tr)
3926 {
3927 	struct trace_event_file *file, *next;
3928 
3929 	list_for_each_entry_safe(file, next, &tr->events, list)
3930 		remove_event_file_dir(file);
3931 }
3932 
3933 static void __add_event_to_tracers(struct trace_event_call *call)
3934 {
3935 	struct trace_array *tr;
3936 
3937 	list_for_each_entry(tr, &ftrace_trace_arrays, list)
3938 		__trace_add_new_event(call, tr);
3939 }
3940 
3941 extern struct trace_event_call *__start_ftrace_events[];
3942 extern struct trace_event_call *__stop_ftrace_events[];
3943 
3944 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
3945 
3946 static __init int setup_trace_event(char *str)
3947 {
3948 	strscpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
3949 	trace_set_ring_buffer_expanded(NULL);
3950 	disable_tracing_selftest("running event tracing");
3951 
3952 	return 1;
3953 }
3954 __setup("trace_event=", setup_trace_event);
3955 
3956 static int events_callback(const char *name, umode_t *mode, void **data,
3957 			   const struct file_operations **fops)
3958 {
3959 	if (strcmp(name, "enable") == 0) {
3960 		*mode = TRACE_MODE_WRITE;
3961 		*fops = &ftrace_tr_enable_fops;
3962 		return 1;
3963 	}
3964 
3965 	if (strcmp(name, "header_page") == 0) {
3966 		*mode = TRACE_MODE_READ;
3967 		*fops = &ftrace_show_header_page_fops;
3968 
3969 	} else if (strcmp(name, "header_event") == 0) {
3970 		*mode = TRACE_MODE_READ;
3971 		*fops = &ftrace_show_header_event_fops;
3972 	} else
3973 		return 0;
3974 
3975 	return 1;
3976 }
3977 
3978 /* Expects to have event_mutex held when called */
3979 static int
3980 create_event_toplevel_files(struct dentry *parent, struct trace_array *tr)
3981 {
3982 	struct eventfs_inode *e_events;
3983 	struct dentry *entry;
3984 	int nr_entries;
3985 	static struct eventfs_entry events_entries[] = {
3986 		{
3987 			.name		= "enable",
3988 			.callback	= events_callback,
3989 		},
3990 		{
3991 			.name		= "header_page",
3992 			.callback	= events_callback,
3993 		},
3994 		{
3995 			.name		= "header_event",
3996 			.callback	= events_callback,
3997 		},
3998 	};
3999 
4000 	entry = trace_create_file("set_event", TRACE_MODE_WRITE, parent,
4001 				  tr, &ftrace_set_event_fops);
4002 	if (!entry)
4003 		return -ENOMEM;
4004 
4005 	nr_entries = ARRAY_SIZE(events_entries);
4006 
4007 	e_events = eventfs_create_events_dir("events", parent, events_entries,
4008 					     nr_entries, tr);
4009 	if (IS_ERR(e_events)) {
4010 		pr_warn("Could not create tracefs 'events' directory\n");
4011 		return -ENOMEM;
4012 	}
4013 
4014 	/* There are not as crucial, just warn if they are not created */
4015 
4016 	trace_create_file("set_event_pid", TRACE_MODE_WRITE, parent,
4017 			  tr, &ftrace_set_event_pid_fops);
4018 
4019 	trace_create_file("set_event_notrace_pid",
4020 			  TRACE_MODE_WRITE, parent, tr,
4021 			  &ftrace_set_event_notrace_pid_fops);
4022 
4023 	tr->event_dir = e_events;
4024 
4025 	return 0;
4026 }
4027 
4028 /**
4029  * event_trace_add_tracer - add a instance of a trace_array to events
4030  * @parent: The parent dentry to place the files/directories for events in
4031  * @tr: The trace array associated with these events
4032  *
4033  * When a new instance is created, it needs to set up its events
4034  * directory, as well as other files associated with events. It also
4035  * creates the event hierarchy in the @parent/events directory.
4036  *
4037  * Returns 0 on success.
4038  *
4039  * Must be called with event_mutex held.
4040  */
4041 int event_trace_add_tracer(struct dentry *parent, struct trace_array *tr)
4042 {
4043 	int ret;
4044 
4045 	lockdep_assert_held(&event_mutex);
4046 
4047 	ret = create_event_toplevel_files(parent, tr);
4048 	if (ret)
4049 		goto out;
4050 
4051 	down_write(&trace_event_sem);
4052 	/* If tr already has the event list, it is initialized in early boot. */
4053 	if (unlikely(!list_empty(&tr->events)))
4054 		__trace_early_add_event_dirs(tr);
4055 	else
4056 		__trace_add_event_dirs(tr);
4057 	up_write(&trace_event_sem);
4058 
4059  out:
4060 	return ret;
4061 }
4062 
4063 /*
4064  * The top trace array already had its file descriptors created.
4065  * Now the files themselves need to be created.
4066  */
4067 static __init int
4068 early_event_add_tracer(struct dentry *parent, struct trace_array *tr)
4069 {
4070 	int ret;
4071 
4072 	guard(mutex)(&event_mutex);
4073 
4074 	ret = create_event_toplevel_files(parent, tr);
4075 	if (ret)
4076 		return ret;
4077 
4078 	down_write(&trace_event_sem);
4079 	__trace_early_add_event_dirs(tr);
4080 	up_write(&trace_event_sem);
4081 
4082 	return 0;
4083 }
4084 
4085 /* Must be called with event_mutex held */
4086 int event_trace_del_tracer(struct trace_array *tr)
4087 {
4088 	lockdep_assert_held(&event_mutex);
4089 
4090 	/* Disable any event triggers and associated soft-disabled events */
4091 	clear_event_triggers(tr);
4092 
4093 	/* Clear the pid list */
4094 	__ftrace_clear_event_pids(tr, TRACE_PIDS | TRACE_NO_PIDS);
4095 
4096 	/* Disable any running events */
4097 	__ftrace_set_clr_event_nolock(tr, NULL, NULL, NULL, 0);
4098 
4099 	/* Make sure no more events are being executed */
4100 	tracepoint_synchronize_unregister();
4101 
4102 	down_write(&trace_event_sem);
4103 	__trace_remove_event_dirs(tr);
4104 	eventfs_remove_events_dir(tr->event_dir);
4105 	up_write(&trace_event_sem);
4106 
4107 	tr->event_dir = NULL;
4108 
4109 	return 0;
4110 }
4111 
4112 static __init int event_trace_memsetup(void)
4113 {
4114 	field_cachep = KMEM_CACHE(ftrace_event_field, SLAB_PANIC);
4115 	file_cachep = KMEM_CACHE(trace_event_file, SLAB_PANIC);
4116 	return 0;
4117 }
4118 
4119 __init void
4120 early_enable_events(struct trace_array *tr, char *buf, bool disable_first)
4121 {
4122 	char *token;
4123 	int ret;
4124 
4125 	while (true) {
4126 		token = strsep(&buf, ",");
4127 
4128 		if (!token)
4129 			break;
4130 
4131 		if (*token) {
4132 			/* Restarting syscalls requires that we stop them first */
4133 			if (disable_first)
4134 				ftrace_set_clr_event(tr, token, 0);
4135 
4136 			ret = ftrace_set_clr_event(tr, token, 1);
4137 			if (ret)
4138 				pr_warn("Failed to enable trace event: %s\n", token);
4139 		}
4140 
4141 		/* Put back the comma to allow this to be called again */
4142 		if (buf)
4143 			*(buf - 1) = ',';
4144 	}
4145 }
4146 
4147 static __init int event_trace_enable(void)
4148 {
4149 	struct trace_array *tr = top_trace_array();
4150 	struct trace_event_call **iter, *call;
4151 	int ret;
4152 
4153 	if (!tr)
4154 		return -ENODEV;
4155 
4156 	for_each_event(iter, __start_ftrace_events, __stop_ftrace_events) {
4157 
4158 		call = *iter;
4159 		ret = event_init(call);
4160 		if (!ret)
4161 			list_add(&call->list, &ftrace_events);
4162 	}
4163 
4164 	register_trigger_cmds();
4165 
4166 	/*
4167 	 * We need the top trace array to have a working set of trace
4168 	 * points at early init, before the debug files and directories
4169 	 * are created. Create the file entries now, and attach them
4170 	 * to the actual file dentries later.
4171 	 */
4172 	__trace_early_add_events(tr);
4173 
4174 	early_enable_events(tr, bootup_event_buf, false);
4175 
4176 	trace_printk_start_comm();
4177 
4178 	register_event_cmds();
4179 
4180 
4181 	return 0;
4182 }
4183 
4184 /*
4185  * event_trace_enable() is called from trace_event_init() first to
4186  * initialize events and perhaps start any events that are on the
4187  * command line. Unfortunately, there are some events that will not
4188  * start this early, like the system call tracepoints that need
4189  * to set the %SYSCALL_WORK_SYSCALL_TRACEPOINT flag of pid 1. But
4190  * event_trace_enable() is called before pid 1 starts, and this flag
4191  * is never set, making the syscall tracepoint never get reached, but
4192  * the event is enabled regardless (and not doing anything).
4193  */
4194 static __init int event_trace_enable_again(void)
4195 {
4196 	struct trace_array *tr;
4197 
4198 	tr = top_trace_array();
4199 	if (!tr)
4200 		return -ENODEV;
4201 
4202 	early_enable_events(tr, bootup_event_buf, true);
4203 
4204 	return 0;
4205 }
4206 
4207 early_initcall(event_trace_enable_again);
4208 
4209 /* Init fields which doesn't related to the tracefs */
4210 static __init int event_trace_init_fields(void)
4211 {
4212 	if (trace_define_generic_fields())
4213 		pr_warn("tracing: Failed to allocated generic fields");
4214 
4215 	if (trace_define_common_fields())
4216 		pr_warn("tracing: Failed to allocate common fields");
4217 
4218 	return 0;
4219 }
4220 
4221 __init int event_trace_init(void)
4222 {
4223 	struct trace_array *tr;
4224 	int ret;
4225 
4226 	tr = top_trace_array();
4227 	if (!tr)
4228 		return -ENODEV;
4229 
4230 	trace_create_file("available_events", TRACE_MODE_READ,
4231 			  NULL, tr, &ftrace_avail_fops);
4232 
4233 	ret = early_event_add_tracer(NULL, tr);
4234 	if (ret)
4235 		return ret;
4236 
4237 #ifdef CONFIG_MODULES
4238 	ret = register_module_notifier(&trace_module_nb);
4239 	if (ret)
4240 		pr_warn("Failed to register trace events module notifier\n");
4241 #endif
4242 
4243 	eventdir_initialized = true;
4244 
4245 	return 0;
4246 }
4247 
4248 void __init trace_event_init(void)
4249 {
4250 	event_trace_memsetup();
4251 	init_ftrace_syscalls();
4252 	event_trace_enable();
4253 	event_trace_init_fields();
4254 }
4255 
4256 #ifdef CONFIG_EVENT_TRACE_STARTUP_TEST
4257 
4258 static DEFINE_SPINLOCK(test_spinlock);
4259 static DEFINE_SPINLOCK(test_spinlock_irq);
4260 static DEFINE_MUTEX(test_mutex);
4261 
4262 static __init void test_work(struct work_struct *dummy)
4263 {
4264 	spin_lock(&test_spinlock);
4265 	spin_lock_irq(&test_spinlock_irq);
4266 	udelay(1);
4267 	spin_unlock_irq(&test_spinlock_irq);
4268 	spin_unlock(&test_spinlock);
4269 
4270 	mutex_lock(&test_mutex);
4271 	msleep(1);
4272 	mutex_unlock(&test_mutex);
4273 }
4274 
4275 static __init int event_test_thread(void *unused)
4276 {
4277 	void *test_malloc;
4278 
4279 	test_malloc = kmalloc(1234, GFP_KERNEL);
4280 	if (!test_malloc)
4281 		pr_info("failed to kmalloc\n");
4282 
4283 	schedule_on_each_cpu(test_work);
4284 
4285 	kfree(test_malloc);
4286 
4287 	set_current_state(TASK_INTERRUPTIBLE);
4288 	while (!kthread_should_stop()) {
4289 		schedule();
4290 		set_current_state(TASK_INTERRUPTIBLE);
4291 	}
4292 	__set_current_state(TASK_RUNNING);
4293 
4294 	return 0;
4295 }
4296 
4297 /*
4298  * Do various things that may trigger events.
4299  */
4300 static __init void event_test_stuff(void)
4301 {
4302 	struct task_struct *test_thread;
4303 
4304 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
4305 	msleep(1);
4306 	kthread_stop(test_thread);
4307 }
4308 
4309 /*
4310  * For every trace event defined, we will test each trace point separately,
4311  * and then by groups, and finally all trace points.
4312  */
4313 static __init void event_trace_self_tests(void)
4314 {
4315 	struct trace_subsystem_dir *dir;
4316 	struct trace_event_file *file;
4317 	struct trace_event_call *call;
4318 	struct event_subsystem *system;
4319 	struct trace_array *tr;
4320 	int ret;
4321 
4322 	tr = top_trace_array();
4323 	if (!tr)
4324 		return;
4325 
4326 	pr_info("Running tests on trace events:\n");
4327 
4328 	list_for_each_entry(file, &tr->events, list) {
4329 
4330 		call = file->event_call;
4331 
4332 		/* Only test those that have a probe */
4333 		if (!call->class || !call->class->probe)
4334 			continue;
4335 
4336 /*
4337  * Testing syscall events here is pretty useless, but
4338  * we still do it if configured. But this is time consuming.
4339  * What we really need is a user thread to perform the
4340  * syscalls as we test.
4341  */
4342 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
4343 		if (call->class->system &&
4344 		    strcmp(call->class->system, "syscalls") == 0)
4345 			continue;
4346 #endif
4347 
4348 		pr_info("Testing event %s: ", trace_event_name(call));
4349 
4350 		/*
4351 		 * If an event is already enabled, someone is using
4352 		 * it and the self test should not be on.
4353 		 */
4354 		if (file->flags & EVENT_FILE_FL_ENABLED) {
4355 			pr_warn("Enabled event during self test!\n");
4356 			WARN_ON_ONCE(1);
4357 			continue;
4358 		}
4359 
4360 		ftrace_event_enable_disable(file, 1);
4361 		event_test_stuff();
4362 		ftrace_event_enable_disable(file, 0);
4363 
4364 		pr_cont("OK\n");
4365 	}
4366 
4367 	/* Now test at the sub system level */
4368 
4369 	pr_info("Running tests on trace event systems:\n");
4370 
4371 	list_for_each_entry(dir, &tr->systems, list) {
4372 
4373 		system = dir->subsystem;
4374 
4375 		/* the ftrace system is special, skip it */
4376 		if (strcmp(system->name, "ftrace") == 0)
4377 			continue;
4378 
4379 		pr_info("Testing event system %s: ", system->name);
4380 
4381 		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 1);
4382 		if (WARN_ON_ONCE(ret)) {
4383 			pr_warn("error enabling system %s\n",
4384 				system->name);
4385 			continue;
4386 		}
4387 
4388 		event_test_stuff();
4389 
4390 		ret = __ftrace_set_clr_event(tr, NULL, system->name, NULL, 0);
4391 		if (WARN_ON_ONCE(ret)) {
4392 			pr_warn("error disabling system %s\n",
4393 				system->name);
4394 			continue;
4395 		}
4396 
4397 		pr_cont("OK\n");
4398 	}
4399 
4400 	/* Test with all events enabled */
4401 
4402 	pr_info("Running tests on all trace events:\n");
4403 	pr_info("Testing all events: ");
4404 
4405 	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 1);
4406 	if (WARN_ON_ONCE(ret)) {
4407 		pr_warn("error enabling all events\n");
4408 		return;
4409 	}
4410 
4411 	event_test_stuff();
4412 
4413 	/* reset sysname */
4414 	ret = __ftrace_set_clr_event(tr, NULL, NULL, NULL, 0);
4415 	if (WARN_ON_ONCE(ret)) {
4416 		pr_warn("error disabling all events\n");
4417 		return;
4418 	}
4419 
4420 	pr_cont("OK\n");
4421 }
4422 
4423 #ifdef CONFIG_FUNCTION_TRACER
4424 
4425 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
4426 
4427 static struct trace_event_file event_trace_file __initdata;
4428 
4429 static void __init
4430 function_test_events_call(unsigned long ip, unsigned long parent_ip,
4431 			  struct ftrace_ops *op, struct ftrace_regs *regs)
4432 {
4433 	struct trace_buffer *buffer;
4434 	struct ring_buffer_event *event;
4435 	struct ftrace_entry *entry;
4436 	unsigned int trace_ctx;
4437 	long disabled;
4438 	int cpu;
4439 
4440 	trace_ctx = tracing_gen_ctx();
4441 	preempt_disable_notrace();
4442 	cpu = raw_smp_processor_id();
4443 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
4444 
4445 	if (disabled != 1)
4446 		goto out;
4447 
4448 	event = trace_event_buffer_lock_reserve(&buffer, &event_trace_file,
4449 						TRACE_FN, sizeof(*entry),
4450 						trace_ctx);
4451 	if (!event)
4452 		goto out;
4453 	entry	= ring_buffer_event_data(event);
4454 	entry->ip			= ip;
4455 	entry->parent_ip		= parent_ip;
4456 
4457 	event_trigger_unlock_commit(&event_trace_file, buffer, event,
4458 				    entry, trace_ctx);
4459  out:
4460 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
4461 	preempt_enable_notrace();
4462 }
4463 
4464 static struct ftrace_ops trace_ops __initdata  =
4465 {
4466 	.func = function_test_events_call,
4467 };
4468 
4469 static __init void event_trace_self_test_with_function(void)
4470 {
4471 	int ret;
4472 
4473 	event_trace_file.tr = top_trace_array();
4474 	if (WARN_ON(!event_trace_file.tr))
4475 		return;
4476 
4477 	ret = register_ftrace_function(&trace_ops);
4478 	if (WARN_ON(ret < 0)) {
4479 		pr_info("Failed to enable function tracer for event tests\n");
4480 		return;
4481 	}
4482 	pr_info("Running tests again, along with the function tracer\n");
4483 	event_trace_self_tests();
4484 	unregister_ftrace_function(&trace_ops);
4485 }
4486 #else
4487 static __init void event_trace_self_test_with_function(void)
4488 {
4489 }
4490 #endif
4491 
4492 static __init int event_trace_self_tests_init(void)
4493 {
4494 	if (!tracing_selftest_disabled) {
4495 		event_trace_self_tests();
4496 		event_trace_self_test_with_function();
4497 	}
4498 
4499 	return 0;
4500 }
4501 
4502 late_initcall(event_trace_self_tests_init);
4503 
4504 #endif
4505