xref: /linux/kernel/trace/trace_events.c (revision 27258e448eb301cf89e351df87aa8cb916653bf2)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #undef TRACE_SYSTEM
25 #define TRACE_SYSTEM "TRACE_SYSTEM"
26 
27 DEFINE_MUTEX(event_mutex);
28 
29 LIST_HEAD(ftrace_events);
30 
31 int trace_define_field(struct ftrace_event_call *call, const char *type,
32 		       const char *name, int offset, int size, int is_signed,
33 		       int filter_type)
34 {
35 	struct ftrace_event_field *field;
36 
37 	field = kzalloc(sizeof(*field), GFP_KERNEL);
38 	if (!field)
39 		goto err;
40 
41 	field->name = kstrdup(name, GFP_KERNEL);
42 	if (!field->name)
43 		goto err;
44 
45 	field->type = kstrdup(type, GFP_KERNEL);
46 	if (!field->type)
47 		goto err;
48 
49 	if (filter_type == FILTER_OTHER)
50 		field->filter_type = filter_assign_type(type);
51 	else
52 		field->filter_type = filter_type;
53 
54 	field->offset = offset;
55 	field->size = size;
56 	field->is_signed = is_signed;
57 
58 	list_add(&field->link, &call->fields);
59 
60 	return 0;
61 
62 err:
63 	if (field) {
64 		kfree(field->name);
65 		kfree(field->type);
66 	}
67 	kfree(field);
68 
69 	return -ENOMEM;
70 }
71 EXPORT_SYMBOL_GPL(trace_define_field);
72 
73 #define __common_field(type, item)					\
74 	ret = trace_define_field(call, #type, "common_" #item,		\
75 				 offsetof(typeof(ent), item),		\
76 				 sizeof(ent.item),			\
77 				 is_signed_type(type), FILTER_OTHER);	\
78 	if (ret)							\
79 		return ret;
80 
81 int trace_define_common_fields(struct ftrace_event_call *call)
82 {
83 	int ret;
84 	struct trace_entry ent;
85 
86 	__common_field(unsigned short, type);
87 	__common_field(unsigned char, flags);
88 	__common_field(unsigned char, preempt_count);
89 	__common_field(int, pid);
90 	__common_field(int, lock_depth);
91 
92 	return ret;
93 }
94 EXPORT_SYMBOL_GPL(trace_define_common_fields);
95 
96 #ifdef CONFIG_MODULES
97 
98 static void trace_destroy_fields(struct ftrace_event_call *call)
99 {
100 	struct ftrace_event_field *field, *next;
101 
102 	list_for_each_entry_safe(field, next, &call->fields, link) {
103 		list_del(&field->link);
104 		kfree(field->type);
105 		kfree(field->name);
106 		kfree(field);
107 	}
108 }
109 
110 #endif /* CONFIG_MODULES */
111 
112 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
113 					int enable)
114 {
115 	switch (enable) {
116 	case 0:
117 		if (call->enabled) {
118 			call->enabled = 0;
119 			tracing_stop_cmdline_record();
120 			call->unregfunc(call->data);
121 		}
122 		break;
123 	case 1:
124 		if (!call->enabled) {
125 			call->enabled = 1;
126 			tracing_start_cmdline_record();
127 			call->regfunc(call->data);
128 		}
129 		break;
130 	}
131 }
132 
133 static void ftrace_clear_events(void)
134 {
135 	struct ftrace_event_call *call;
136 
137 	mutex_lock(&event_mutex);
138 	list_for_each_entry(call, &ftrace_events, list) {
139 		ftrace_event_enable_disable(call, 0);
140 	}
141 	mutex_unlock(&event_mutex);
142 }
143 
144 /*
145  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
146  */
147 static int __ftrace_set_clr_event(const char *match, const char *sub,
148 				  const char *event, int set)
149 {
150 	struct ftrace_event_call *call;
151 	int ret = -EINVAL;
152 
153 	mutex_lock(&event_mutex);
154 	list_for_each_entry(call, &ftrace_events, list) {
155 
156 		if (!call->name || !call->regfunc)
157 			continue;
158 
159 		if (match &&
160 		    strcmp(match, call->name) != 0 &&
161 		    strcmp(match, call->system) != 0)
162 			continue;
163 
164 		if (sub && strcmp(sub, call->system) != 0)
165 			continue;
166 
167 		if (event && strcmp(event, call->name) != 0)
168 			continue;
169 
170 		ftrace_event_enable_disable(call, set);
171 
172 		ret = 0;
173 	}
174 	mutex_unlock(&event_mutex);
175 
176 	return ret;
177 }
178 
179 static int ftrace_set_clr_event(char *buf, int set)
180 {
181 	char *event = NULL, *sub = NULL, *match;
182 
183 	/*
184 	 * The buf format can be <subsystem>:<event-name>
185 	 *  *:<event-name> means any event by that name.
186 	 *  :<event-name> is the same.
187 	 *
188 	 *  <subsystem>:* means all events in that subsystem
189 	 *  <subsystem>: means the same.
190 	 *
191 	 *  <name> (no ':') means all events in a subsystem with
192 	 *  the name <name> or any event that matches <name>
193 	 */
194 
195 	match = strsep(&buf, ":");
196 	if (buf) {
197 		sub = match;
198 		event = buf;
199 		match = NULL;
200 
201 		if (!strlen(sub) || strcmp(sub, "*") == 0)
202 			sub = NULL;
203 		if (!strlen(event) || strcmp(event, "*") == 0)
204 			event = NULL;
205 	}
206 
207 	return __ftrace_set_clr_event(match, sub, event, set);
208 }
209 
210 /**
211  * trace_set_clr_event - enable or disable an event
212  * @system: system name to match (NULL for any system)
213  * @event: event name to match (NULL for all events, within system)
214  * @set: 1 to enable, 0 to disable
215  *
216  * This is a way for other parts of the kernel to enable or disable
217  * event recording.
218  *
219  * Returns 0 on success, -EINVAL if the parameters do not match any
220  * registered events.
221  */
222 int trace_set_clr_event(const char *system, const char *event, int set)
223 {
224 	return __ftrace_set_clr_event(NULL, system, event, set);
225 }
226 
227 /* 128 should be much more than enough */
228 #define EVENT_BUF_SIZE		127
229 
230 static ssize_t
231 ftrace_event_write(struct file *file, const char __user *ubuf,
232 		   size_t cnt, loff_t *ppos)
233 {
234 	struct trace_parser parser;
235 	size_t read = 0;
236 	ssize_t ret;
237 
238 	if (!cnt || cnt < 0)
239 		return 0;
240 
241 	ret = tracing_update_buffers();
242 	if (ret < 0)
243 		return ret;
244 
245 	if (trace_parser_get_init(&parser, EVENT_BUF_SIZE + 1))
246 		return -ENOMEM;
247 
248 	read = trace_get_user(&parser, ubuf, cnt, ppos);
249 
250 	if (trace_parser_loaded((&parser))) {
251 		int set = 1;
252 
253 		if (*parser.buffer == '!')
254 			set = 0;
255 
256 		parser.buffer[parser.idx] = 0;
257 
258 		ret = ftrace_set_clr_event(parser.buffer + !set, set);
259 		if (ret)
260 			goto out_put;
261 	}
262 
263 	ret = read;
264 
265  out_put:
266 	trace_parser_put(&parser);
267 
268 	return ret;
269 }
270 
271 static void *
272 t_next(struct seq_file *m, void *v, loff_t *pos)
273 {
274 	struct ftrace_event_call *call = v;
275 
276 	(*pos)++;
277 
278 	list_for_each_entry_continue(call, &ftrace_events, list) {
279 		/*
280 		 * The ftrace subsystem is for showing formats only.
281 		 * They can not be enabled or disabled via the event files.
282 		 */
283 		if (call->regfunc)
284 			return call;
285 	}
286 
287 	return NULL;
288 }
289 
290 static void *t_start(struct seq_file *m, loff_t *pos)
291 {
292 	struct ftrace_event_call *call;
293 	loff_t l;
294 
295 	mutex_lock(&event_mutex);
296 
297 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
298 	for (l = 0; l <= *pos; ) {
299 		call = t_next(m, call, &l);
300 		if (!call)
301 			break;
302 	}
303 	return call;
304 }
305 
306 static void *
307 s_next(struct seq_file *m, void *v, loff_t *pos)
308 {
309 	struct ftrace_event_call *call = v;
310 
311 	(*pos)++;
312 
313 	list_for_each_entry_continue(call, &ftrace_events, list) {
314 		if (call->enabled)
315 			return call;
316 	}
317 
318 	return NULL;
319 }
320 
321 static void *s_start(struct seq_file *m, loff_t *pos)
322 {
323 	struct ftrace_event_call *call;
324 	loff_t l;
325 
326 	mutex_lock(&event_mutex);
327 
328 	call = list_entry(&ftrace_events, struct ftrace_event_call, list);
329 	for (l = 0; l <= *pos; ) {
330 		call = s_next(m, call, &l);
331 		if (!call)
332 			break;
333 	}
334 	return call;
335 }
336 
337 static int t_show(struct seq_file *m, void *v)
338 {
339 	struct ftrace_event_call *call = v;
340 
341 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
342 		seq_printf(m, "%s:", call->system);
343 	seq_printf(m, "%s\n", call->name);
344 
345 	return 0;
346 }
347 
348 static void t_stop(struct seq_file *m, void *p)
349 {
350 	mutex_unlock(&event_mutex);
351 }
352 
353 static int
354 ftrace_event_seq_open(struct inode *inode, struct file *file)
355 {
356 	const struct seq_operations *seq_ops;
357 
358 	if ((file->f_mode & FMODE_WRITE) &&
359 	    (file->f_flags & O_TRUNC))
360 		ftrace_clear_events();
361 
362 	seq_ops = inode->i_private;
363 	return seq_open(file, seq_ops);
364 }
365 
366 static ssize_t
367 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
368 		  loff_t *ppos)
369 {
370 	struct ftrace_event_call *call = filp->private_data;
371 	char *buf;
372 
373 	if (call->enabled)
374 		buf = "1\n";
375 	else
376 		buf = "0\n";
377 
378 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
379 }
380 
381 static ssize_t
382 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
383 		   loff_t *ppos)
384 {
385 	struct ftrace_event_call *call = filp->private_data;
386 	char buf[64];
387 	unsigned long val;
388 	int ret;
389 
390 	if (cnt >= sizeof(buf))
391 		return -EINVAL;
392 
393 	if (copy_from_user(&buf, ubuf, cnt))
394 		return -EFAULT;
395 
396 	buf[cnt] = 0;
397 
398 	ret = strict_strtoul(buf, 10, &val);
399 	if (ret < 0)
400 		return ret;
401 
402 	ret = tracing_update_buffers();
403 	if (ret < 0)
404 		return ret;
405 
406 	switch (val) {
407 	case 0:
408 	case 1:
409 		mutex_lock(&event_mutex);
410 		ftrace_event_enable_disable(call, val);
411 		mutex_unlock(&event_mutex);
412 		break;
413 
414 	default:
415 		return -EINVAL;
416 	}
417 
418 	*ppos += cnt;
419 
420 	return cnt;
421 }
422 
423 static ssize_t
424 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
425 		   loff_t *ppos)
426 {
427 	const char set_to_char[4] = { '?', '0', '1', 'X' };
428 	const char *system = filp->private_data;
429 	struct ftrace_event_call *call;
430 	char buf[2];
431 	int set = 0;
432 	int ret;
433 
434 	mutex_lock(&event_mutex);
435 	list_for_each_entry(call, &ftrace_events, list) {
436 		if (!call->name || !call->regfunc)
437 			continue;
438 
439 		if (system && strcmp(call->system, system) != 0)
440 			continue;
441 
442 		/*
443 		 * We need to find out if all the events are set
444 		 * or if all events or cleared, or if we have
445 		 * a mixture.
446 		 */
447 		set |= (1 << !!call->enabled);
448 
449 		/*
450 		 * If we have a mixture, no need to look further.
451 		 */
452 		if (set == 3)
453 			break;
454 	}
455 	mutex_unlock(&event_mutex);
456 
457 	buf[0] = set_to_char[set];
458 	buf[1] = '\n';
459 
460 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
461 
462 	return ret;
463 }
464 
465 static ssize_t
466 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
467 		    loff_t *ppos)
468 {
469 	const char *system = filp->private_data;
470 	unsigned long val;
471 	char buf[64];
472 	ssize_t ret;
473 
474 	if (cnt >= sizeof(buf))
475 		return -EINVAL;
476 
477 	if (copy_from_user(&buf, ubuf, cnt))
478 		return -EFAULT;
479 
480 	buf[cnt] = 0;
481 
482 	ret = strict_strtoul(buf, 10, &val);
483 	if (ret < 0)
484 		return ret;
485 
486 	ret = tracing_update_buffers();
487 	if (ret < 0)
488 		return ret;
489 
490 	if (val != 0 && val != 1)
491 		return -EINVAL;
492 
493 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
494 	if (ret)
495 		goto out;
496 
497 	ret = cnt;
498 
499 out:
500 	*ppos += cnt;
501 
502 	return ret;
503 }
504 
505 extern char *__bad_type_size(void);
506 
507 #undef FIELD
508 #define FIELD(type, name)						\
509 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
510 	#type, "common_" #name, offsetof(typeof(field), name),		\
511 		sizeof(field.name)
512 
513 static int trace_write_header(struct trace_seq *s)
514 {
515 	struct trace_entry field;
516 
517 	/* struct trace_entry */
518 	return trace_seq_printf(s,
519 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
520 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
521 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
522 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
523 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
524 				"\n",
525 				FIELD(unsigned short, type),
526 				FIELD(unsigned char, flags),
527 				FIELD(unsigned char, preempt_count),
528 				FIELD(int, pid),
529 				FIELD(int, lock_depth));
530 }
531 
532 static ssize_t
533 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
534 		  loff_t *ppos)
535 {
536 	struct ftrace_event_call *call = filp->private_data;
537 	struct trace_seq *s;
538 	char *buf;
539 	int r;
540 
541 	if (*ppos)
542 		return 0;
543 
544 	s = kmalloc(sizeof(*s), GFP_KERNEL);
545 	if (!s)
546 		return -ENOMEM;
547 
548 	trace_seq_init(s);
549 
550 	/* If any of the first writes fail, so will the show_format. */
551 
552 	trace_seq_printf(s, "name: %s\n", call->name);
553 	trace_seq_printf(s, "ID: %d\n", call->id);
554 	trace_seq_printf(s, "format:\n");
555 	trace_write_header(s);
556 
557 	r = call->show_format(call, s);
558 	if (!r) {
559 		/*
560 		 * ug!  The format output is bigger than a PAGE!!
561 		 */
562 		buf = "FORMAT TOO BIG\n";
563 		r = simple_read_from_buffer(ubuf, cnt, ppos,
564 					      buf, strlen(buf));
565 		goto out;
566 	}
567 
568 	r = simple_read_from_buffer(ubuf, cnt, ppos,
569 				    s->buffer, s->len);
570  out:
571 	kfree(s);
572 	return r;
573 }
574 
575 static ssize_t
576 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
577 {
578 	struct ftrace_event_call *call = filp->private_data;
579 	struct trace_seq *s;
580 	int r;
581 
582 	if (*ppos)
583 		return 0;
584 
585 	s = kmalloc(sizeof(*s), GFP_KERNEL);
586 	if (!s)
587 		return -ENOMEM;
588 
589 	trace_seq_init(s);
590 	trace_seq_printf(s, "%d\n", call->id);
591 
592 	r = simple_read_from_buffer(ubuf, cnt, ppos,
593 				    s->buffer, s->len);
594 	kfree(s);
595 	return r;
596 }
597 
598 static ssize_t
599 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
600 		  loff_t *ppos)
601 {
602 	struct ftrace_event_call *call = filp->private_data;
603 	struct trace_seq *s;
604 	int r;
605 
606 	if (*ppos)
607 		return 0;
608 
609 	s = kmalloc(sizeof(*s), GFP_KERNEL);
610 	if (!s)
611 		return -ENOMEM;
612 
613 	trace_seq_init(s);
614 
615 	print_event_filter(call, s);
616 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
617 
618 	kfree(s);
619 
620 	return r;
621 }
622 
623 static ssize_t
624 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
625 		   loff_t *ppos)
626 {
627 	struct ftrace_event_call *call = filp->private_data;
628 	char *buf;
629 	int err;
630 
631 	if (cnt >= PAGE_SIZE)
632 		return -EINVAL;
633 
634 	buf = (char *)__get_free_page(GFP_TEMPORARY);
635 	if (!buf)
636 		return -ENOMEM;
637 
638 	if (copy_from_user(buf, ubuf, cnt)) {
639 		free_page((unsigned long) buf);
640 		return -EFAULT;
641 	}
642 	buf[cnt] = '\0';
643 
644 	err = apply_event_filter(call, buf);
645 	free_page((unsigned long) buf);
646 	if (err < 0)
647 		return err;
648 
649 	*ppos += cnt;
650 
651 	return cnt;
652 }
653 
654 static ssize_t
655 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
656 		      loff_t *ppos)
657 {
658 	struct event_subsystem *system = filp->private_data;
659 	struct trace_seq *s;
660 	int r;
661 
662 	if (*ppos)
663 		return 0;
664 
665 	s = kmalloc(sizeof(*s), GFP_KERNEL);
666 	if (!s)
667 		return -ENOMEM;
668 
669 	trace_seq_init(s);
670 
671 	print_subsystem_event_filter(system, s);
672 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
673 
674 	kfree(s);
675 
676 	return r;
677 }
678 
679 static ssize_t
680 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
681 		       loff_t *ppos)
682 {
683 	struct event_subsystem *system = filp->private_data;
684 	char *buf;
685 	int err;
686 
687 	if (cnt >= PAGE_SIZE)
688 		return -EINVAL;
689 
690 	buf = (char *)__get_free_page(GFP_TEMPORARY);
691 	if (!buf)
692 		return -ENOMEM;
693 
694 	if (copy_from_user(buf, ubuf, cnt)) {
695 		free_page((unsigned long) buf);
696 		return -EFAULT;
697 	}
698 	buf[cnt] = '\0';
699 
700 	err = apply_subsystem_event_filter(system, buf);
701 	free_page((unsigned long) buf);
702 	if (err < 0)
703 		return err;
704 
705 	*ppos += cnt;
706 
707 	return cnt;
708 }
709 
710 static ssize_t
711 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
712 {
713 	int (*func)(struct trace_seq *s) = filp->private_data;
714 	struct trace_seq *s;
715 	int r;
716 
717 	if (*ppos)
718 		return 0;
719 
720 	s = kmalloc(sizeof(*s), GFP_KERNEL);
721 	if (!s)
722 		return -ENOMEM;
723 
724 	trace_seq_init(s);
725 
726 	func(s);
727 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
728 
729 	kfree(s);
730 
731 	return r;
732 }
733 
734 static const struct seq_operations show_event_seq_ops = {
735 	.start = t_start,
736 	.next = t_next,
737 	.show = t_show,
738 	.stop = t_stop,
739 };
740 
741 static const struct seq_operations show_set_event_seq_ops = {
742 	.start = s_start,
743 	.next = s_next,
744 	.show = t_show,
745 	.stop = t_stop,
746 };
747 
748 static const struct file_operations ftrace_avail_fops = {
749 	.open = ftrace_event_seq_open,
750 	.read = seq_read,
751 	.llseek = seq_lseek,
752 	.release = seq_release,
753 };
754 
755 static const struct file_operations ftrace_set_event_fops = {
756 	.open = ftrace_event_seq_open,
757 	.read = seq_read,
758 	.write = ftrace_event_write,
759 	.llseek = seq_lseek,
760 	.release = seq_release,
761 };
762 
763 static const struct file_operations ftrace_enable_fops = {
764 	.open = tracing_open_generic,
765 	.read = event_enable_read,
766 	.write = event_enable_write,
767 };
768 
769 static const struct file_operations ftrace_event_format_fops = {
770 	.open = tracing_open_generic,
771 	.read = event_format_read,
772 };
773 
774 static const struct file_operations ftrace_event_id_fops = {
775 	.open = tracing_open_generic,
776 	.read = event_id_read,
777 };
778 
779 static const struct file_operations ftrace_event_filter_fops = {
780 	.open = tracing_open_generic,
781 	.read = event_filter_read,
782 	.write = event_filter_write,
783 };
784 
785 static const struct file_operations ftrace_subsystem_filter_fops = {
786 	.open = tracing_open_generic,
787 	.read = subsystem_filter_read,
788 	.write = subsystem_filter_write,
789 };
790 
791 static const struct file_operations ftrace_system_enable_fops = {
792 	.open = tracing_open_generic,
793 	.read = system_enable_read,
794 	.write = system_enable_write,
795 };
796 
797 static const struct file_operations ftrace_show_header_fops = {
798 	.open = tracing_open_generic,
799 	.read = show_header,
800 };
801 
802 static struct dentry *event_trace_events_dir(void)
803 {
804 	static struct dentry *d_tracer;
805 	static struct dentry *d_events;
806 
807 	if (d_events)
808 		return d_events;
809 
810 	d_tracer = tracing_init_dentry();
811 	if (!d_tracer)
812 		return NULL;
813 
814 	d_events = debugfs_create_dir("events", d_tracer);
815 	if (!d_events)
816 		pr_warning("Could not create debugfs "
817 			   "'events' directory\n");
818 
819 	return d_events;
820 }
821 
822 static LIST_HEAD(event_subsystems);
823 
824 static struct dentry *
825 event_subsystem_dir(const char *name, struct dentry *d_events)
826 {
827 	struct event_subsystem *system;
828 	struct dentry *entry;
829 
830 	/* First see if we did not already create this dir */
831 	list_for_each_entry(system, &event_subsystems, list) {
832 		if (strcmp(system->name, name) == 0) {
833 			system->nr_events++;
834 			return system->entry;
835 		}
836 	}
837 
838 	/* need to create new entry */
839 	system = kmalloc(sizeof(*system), GFP_KERNEL);
840 	if (!system) {
841 		pr_warning("No memory to create event subsystem %s\n",
842 			   name);
843 		return d_events;
844 	}
845 
846 	system->entry = debugfs_create_dir(name, d_events);
847 	if (!system->entry) {
848 		pr_warning("Could not create event subsystem %s\n",
849 			   name);
850 		kfree(system);
851 		return d_events;
852 	}
853 
854 	system->nr_events = 1;
855 	system->name = kstrdup(name, GFP_KERNEL);
856 	if (!system->name) {
857 		debugfs_remove(system->entry);
858 		kfree(system);
859 		return d_events;
860 	}
861 
862 	list_add(&system->list, &event_subsystems);
863 
864 	system->filter = NULL;
865 
866 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
867 	if (!system->filter) {
868 		pr_warning("Could not allocate filter for subsystem "
869 			   "'%s'\n", name);
870 		return system->entry;
871 	}
872 
873 	entry = debugfs_create_file("filter", 0644, system->entry, system,
874 				    &ftrace_subsystem_filter_fops);
875 	if (!entry) {
876 		kfree(system->filter);
877 		system->filter = NULL;
878 		pr_warning("Could not create debugfs "
879 			   "'%s/filter' entry\n", name);
880 	}
881 
882 	entry = trace_create_file("enable", 0644, system->entry,
883 				  (void *)system->name,
884 				  &ftrace_system_enable_fops);
885 
886 	return system->entry;
887 }
888 
889 static int
890 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
891 		 const struct file_operations *id,
892 		 const struct file_operations *enable,
893 		 const struct file_operations *filter,
894 		 const struct file_operations *format)
895 {
896 	struct dentry *entry;
897 	int ret;
898 
899 	/*
900 	 * If the trace point header did not define TRACE_SYSTEM
901 	 * then the system would be called "TRACE_SYSTEM".
902 	 */
903 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
904 		d_events = event_subsystem_dir(call->system, d_events);
905 
906 	call->dir = debugfs_create_dir(call->name, d_events);
907 	if (!call->dir) {
908 		pr_warning("Could not create debugfs "
909 			   "'%s' directory\n", call->name);
910 		return -1;
911 	}
912 
913 	if (call->regfunc)
914 		entry = trace_create_file("enable", 0644, call->dir, call,
915 					  enable);
916 
917 	if (call->id && call->profile_enable)
918 		entry = trace_create_file("id", 0444, call->dir, call,
919 					  id);
920 
921 	if (call->define_fields) {
922 		ret = call->define_fields(call);
923 		if (ret < 0) {
924 			pr_warning("Could not initialize trace point"
925 				   " events/%s\n", call->name);
926 			return ret;
927 		}
928 		entry = trace_create_file("filter", 0644, call->dir, call,
929 					  filter);
930 	}
931 
932 	/* A trace may not want to export its format */
933 	if (!call->show_format)
934 		return 0;
935 
936 	entry = trace_create_file("format", 0444, call->dir, call,
937 				  format);
938 
939 	return 0;
940 }
941 
942 #define for_each_event(event, start, end)			\
943 	for (event = start;					\
944 	     (unsigned long)event < (unsigned long)end;		\
945 	     event++)
946 
947 #ifdef CONFIG_MODULES
948 
949 static LIST_HEAD(ftrace_module_file_list);
950 
951 /*
952  * Modules must own their file_operations to keep up with
953  * reference counting.
954  */
955 struct ftrace_module_file_ops {
956 	struct list_head		list;
957 	struct module			*mod;
958 	struct file_operations		id;
959 	struct file_operations		enable;
960 	struct file_operations		format;
961 	struct file_operations		filter;
962 };
963 
964 static void remove_subsystem_dir(const char *name)
965 {
966 	struct event_subsystem *system;
967 
968 	if (strcmp(name, TRACE_SYSTEM) == 0)
969 		return;
970 
971 	list_for_each_entry(system, &event_subsystems, list) {
972 		if (strcmp(system->name, name) == 0) {
973 			if (!--system->nr_events) {
974 				struct event_filter *filter = system->filter;
975 
976 				debugfs_remove_recursive(system->entry);
977 				list_del(&system->list);
978 				if (filter) {
979 					kfree(filter->filter_string);
980 					kfree(filter);
981 				}
982 				kfree(system->name);
983 				kfree(system);
984 			}
985 			break;
986 		}
987 	}
988 }
989 
990 static struct ftrace_module_file_ops *
991 trace_create_file_ops(struct module *mod)
992 {
993 	struct ftrace_module_file_ops *file_ops;
994 
995 	/*
996 	 * This is a bit of a PITA. To allow for correct reference
997 	 * counting, modules must "own" their file_operations.
998 	 * To do this, we allocate the file operations that will be
999 	 * used in the event directory.
1000 	 */
1001 
1002 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1003 	if (!file_ops)
1004 		return NULL;
1005 
1006 	file_ops->mod = mod;
1007 
1008 	file_ops->id = ftrace_event_id_fops;
1009 	file_ops->id.owner = mod;
1010 
1011 	file_ops->enable = ftrace_enable_fops;
1012 	file_ops->enable.owner = mod;
1013 
1014 	file_ops->filter = ftrace_event_filter_fops;
1015 	file_ops->filter.owner = mod;
1016 
1017 	file_ops->format = ftrace_event_format_fops;
1018 	file_ops->format.owner = mod;
1019 
1020 	list_add(&file_ops->list, &ftrace_module_file_list);
1021 
1022 	return file_ops;
1023 }
1024 
1025 static void trace_module_add_events(struct module *mod)
1026 {
1027 	struct ftrace_module_file_ops *file_ops = NULL;
1028 	struct ftrace_event_call *call, *start, *end;
1029 	struct dentry *d_events;
1030 	int ret;
1031 
1032 	start = mod->trace_events;
1033 	end = mod->trace_events + mod->num_trace_events;
1034 
1035 	if (start == end)
1036 		return;
1037 
1038 	d_events = event_trace_events_dir();
1039 	if (!d_events)
1040 		return;
1041 
1042 	for_each_event(call, start, end) {
1043 		/* The linker may leave blanks */
1044 		if (!call->name)
1045 			continue;
1046 		if (call->raw_init) {
1047 			ret = call->raw_init();
1048 			if (ret < 0) {
1049 				if (ret != -ENOSYS)
1050 					pr_warning("Could not initialize trace "
1051 					"point events/%s\n", call->name);
1052 				continue;
1053 			}
1054 		}
1055 		/*
1056 		 * This module has events, create file ops for this module
1057 		 * if not already done.
1058 		 */
1059 		if (!file_ops) {
1060 			file_ops = trace_create_file_ops(mod);
1061 			if (!file_ops)
1062 				return;
1063 		}
1064 		call->mod = mod;
1065 		list_add(&call->list, &ftrace_events);
1066 		event_create_dir(call, d_events,
1067 				 &file_ops->id, &file_ops->enable,
1068 				 &file_ops->filter, &file_ops->format);
1069 	}
1070 }
1071 
1072 static void trace_module_remove_events(struct module *mod)
1073 {
1074 	struct ftrace_module_file_ops *file_ops;
1075 	struct ftrace_event_call *call, *p;
1076 	bool found = false;
1077 
1078 	down_write(&trace_event_mutex);
1079 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1080 		if (call->mod == mod) {
1081 			found = true;
1082 			ftrace_event_enable_disable(call, 0);
1083 			if (call->event)
1084 				__unregister_ftrace_event(call->event);
1085 			debugfs_remove_recursive(call->dir);
1086 			list_del(&call->list);
1087 			trace_destroy_fields(call);
1088 			destroy_preds(call);
1089 			remove_subsystem_dir(call->system);
1090 		}
1091 	}
1092 
1093 	/* Now free the file_operations */
1094 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1095 		if (file_ops->mod == mod)
1096 			break;
1097 	}
1098 	if (&file_ops->list != &ftrace_module_file_list) {
1099 		list_del(&file_ops->list);
1100 		kfree(file_ops);
1101 	}
1102 
1103 	/*
1104 	 * It is safest to reset the ring buffer if the module being unloaded
1105 	 * registered any events.
1106 	 */
1107 	if (found)
1108 		tracing_reset_current_online_cpus();
1109 	up_write(&trace_event_mutex);
1110 }
1111 
1112 static int trace_module_notify(struct notifier_block *self,
1113 			       unsigned long val, void *data)
1114 {
1115 	struct module *mod = data;
1116 
1117 	mutex_lock(&event_mutex);
1118 	switch (val) {
1119 	case MODULE_STATE_COMING:
1120 		trace_module_add_events(mod);
1121 		break;
1122 	case MODULE_STATE_GOING:
1123 		trace_module_remove_events(mod);
1124 		break;
1125 	}
1126 	mutex_unlock(&event_mutex);
1127 
1128 	return 0;
1129 }
1130 #else
1131 static int trace_module_notify(struct notifier_block *self,
1132 			       unsigned long val, void *data)
1133 {
1134 	return 0;
1135 }
1136 #endif /* CONFIG_MODULES */
1137 
1138 static struct notifier_block trace_module_nb = {
1139 	.notifier_call = trace_module_notify,
1140 	.priority = 0,
1141 };
1142 
1143 extern struct ftrace_event_call __start_ftrace_events[];
1144 extern struct ftrace_event_call __stop_ftrace_events[];
1145 
1146 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1147 
1148 static __init int setup_trace_event(char *str)
1149 {
1150 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1151 	ring_buffer_expanded = 1;
1152 	tracing_selftest_disabled = 1;
1153 
1154 	return 1;
1155 }
1156 __setup("trace_event=", setup_trace_event);
1157 
1158 static __init int event_trace_init(void)
1159 {
1160 	struct ftrace_event_call *call;
1161 	struct dentry *d_tracer;
1162 	struct dentry *entry;
1163 	struct dentry *d_events;
1164 	int ret;
1165 	char *buf = bootup_event_buf;
1166 	char *token;
1167 
1168 	d_tracer = tracing_init_dentry();
1169 	if (!d_tracer)
1170 		return 0;
1171 
1172 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1173 				    (void *)&show_event_seq_ops,
1174 				    &ftrace_avail_fops);
1175 	if (!entry)
1176 		pr_warning("Could not create debugfs "
1177 			   "'available_events' entry\n");
1178 
1179 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1180 				    (void *)&show_set_event_seq_ops,
1181 				    &ftrace_set_event_fops);
1182 	if (!entry)
1183 		pr_warning("Could not create debugfs "
1184 			   "'set_event' entry\n");
1185 
1186 	d_events = event_trace_events_dir();
1187 	if (!d_events)
1188 		return 0;
1189 
1190 	/* ring buffer internal formats */
1191 	trace_create_file("header_page", 0444, d_events,
1192 			  ring_buffer_print_page_header,
1193 			  &ftrace_show_header_fops);
1194 
1195 	trace_create_file("header_event", 0444, d_events,
1196 			  ring_buffer_print_entry_header,
1197 			  &ftrace_show_header_fops);
1198 
1199 	trace_create_file("enable", 0644, d_events,
1200 			  NULL, &ftrace_system_enable_fops);
1201 
1202 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1203 		/* The linker may leave blanks */
1204 		if (!call->name)
1205 			continue;
1206 		if (call->raw_init) {
1207 			ret = call->raw_init();
1208 			if (ret < 0) {
1209 				if (ret != -ENOSYS)
1210 					pr_warning("Could not initialize trace "
1211 					"point events/%s\n", call->name);
1212 				continue;
1213 			}
1214 		}
1215 		list_add(&call->list, &ftrace_events);
1216 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1217 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1218 				 &ftrace_event_format_fops);
1219 	}
1220 
1221 	while (true) {
1222 		token = strsep(&buf, ",");
1223 
1224 		if (!token)
1225 			break;
1226 		if (!*token)
1227 			continue;
1228 
1229 		ret = ftrace_set_clr_event(token, 1);
1230 		if (ret)
1231 			pr_warning("Failed to enable trace event: %s\n", token);
1232 	}
1233 
1234 	ret = register_module_notifier(&trace_module_nb);
1235 	if (ret)
1236 		pr_warning("Failed to register trace events module notifier\n");
1237 
1238 	return 0;
1239 }
1240 fs_initcall(event_trace_init);
1241 
1242 #ifdef CONFIG_FTRACE_STARTUP_TEST
1243 
1244 static DEFINE_SPINLOCK(test_spinlock);
1245 static DEFINE_SPINLOCK(test_spinlock_irq);
1246 static DEFINE_MUTEX(test_mutex);
1247 
1248 static __init void test_work(struct work_struct *dummy)
1249 {
1250 	spin_lock(&test_spinlock);
1251 	spin_lock_irq(&test_spinlock_irq);
1252 	udelay(1);
1253 	spin_unlock_irq(&test_spinlock_irq);
1254 	spin_unlock(&test_spinlock);
1255 
1256 	mutex_lock(&test_mutex);
1257 	msleep(1);
1258 	mutex_unlock(&test_mutex);
1259 }
1260 
1261 static __init int event_test_thread(void *unused)
1262 {
1263 	void *test_malloc;
1264 
1265 	test_malloc = kmalloc(1234, GFP_KERNEL);
1266 	if (!test_malloc)
1267 		pr_info("failed to kmalloc\n");
1268 
1269 	schedule_on_each_cpu(test_work);
1270 
1271 	kfree(test_malloc);
1272 
1273 	set_current_state(TASK_INTERRUPTIBLE);
1274 	while (!kthread_should_stop())
1275 		schedule();
1276 
1277 	return 0;
1278 }
1279 
1280 /*
1281  * Do various things that may trigger events.
1282  */
1283 static __init void event_test_stuff(void)
1284 {
1285 	struct task_struct *test_thread;
1286 
1287 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1288 	msleep(1);
1289 	kthread_stop(test_thread);
1290 }
1291 
1292 /*
1293  * For every trace event defined, we will test each trace point separately,
1294  * and then by groups, and finally all trace points.
1295  */
1296 static __init void event_trace_self_tests(void)
1297 {
1298 	struct ftrace_event_call *call;
1299 	struct event_subsystem *system;
1300 	int ret;
1301 
1302 	pr_info("Running tests on trace events:\n");
1303 
1304 	list_for_each_entry(call, &ftrace_events, list) {
1305 
1306 		/* Only test those that have a regfunc */
1307 		if (!call->regfunc)
1308 			continue;
1309 
1310 /*
1311  * Testing syscall events here is pretty useless, but
1312  * we still do it if configured. But this is time consuming.
1313  * What we really need is a user thread to perform the
1314  * syscalls as we test.
1315  */
1316 #ifndef CONFIG_EVENT_TRACE_TEST_SYSCALLS
1317 		if (call->system &&
1318 		    strcmp(call->system, "syscalls") == 0)
1319 			continue;
1320 #endif
1321 
1322 		pr_info("Testing event %s: ", call->name);
1323 
1324 		/*
1325 		 * If an event is already enabled, someone is using
1326 		 * it and the self test should not be on.
1327 		 */
1328 		if (call->enabled) {
1329 			pr_warning("Enabled event during self test!\n");
1330 			WARN_ON_ONCE(1);
1331 			continue;
1332 		}
1333 
1334 		ftrace_event_enable_disable(call, 1);
1335 		event_test_stuff();
1336 		ftrace_event_enable_disable(call, 0);
1337 
1338 		pr_cont("OK\n");
1339 	}
1340 
1341 	/* Now test at the sub system level */
1342 
1343 	pr_info("Running tests on trace event systems:\n");
1344 
1345 	list_for_each_entry(system, &event_subsystems, list) {
1346 
1347 		/* the ftrace system is special, skip it */
1348 		if (strcmp(system->name, "ftrace") == 0)
1349 			continue;
1350 
1351 		pr_info("Testing event system %s: ", system->name);
1352 
1353 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1354 		if (WARN_ON_ONCE(ret)) {
1355 			pr_warning("error enabling system %s\n",
1356 				   system->name);
1357 			continue;
1358 		}
1359 
1360 		event_test_stuff();
1361 
1362 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1363 		if (WARN_ON_ONCE(ret))
1364 			pr_warning("error disabling system %s\n",
1365 				   system->name);
1366 
1367 		pr_cont("OK\n");
1368 	}
1369 
1370 	/* Test with all events enabled */
1371 
1372 	pr_info("Running tests on all trace events:\n");
1373 	pr_info("Testing all events: ");
1374 
1375 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1376 	if (WARN_ON_ONCE(ret)) {
1377 		pr_warning("error enabling all events\n");
1378 		return;
1379 	}
1380 
1381 	event_test_stuff();
1382 
1383 	/* reset sysname */
1384 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1385 	if (WARN_ON_ONCE(ret)) {
1386 		pr_warning("error disabling all events\n");
1387 		return;
1388 	}
1389 
1390 	pr_cont("OK\n");
1391 }
1392 
1393 #ifdef CONFIG_FUNCTION_TRACER
1394 
1395 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1396 
1397 static void
1398 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1399 {
1400 	struct ring_buffer_event *event;
1401 	struct ring_buffer *buffer;
1402 	struct ftrace_entry *entry;
1403 	unsigned long flags;
1404 	long disabled;
1405 	int resched;
1406 	int cpu;
1407 	int pc;
1408 
1409 	pc = preempt_count();
1410 	resched = ftrace_preempt_disable();
1411 	cpu = raw_smp_processor_id();
1412 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1413 
1414 	if (disabled != 1)
1415 		goto out;
1416 
1417 	local_save_flags(flags);
1418 
1419 	event = trace_current_buffer_lock_reserve(&buffer,
1420 						  TRACE_FN, sizeof(*entry),
1421 						  flags, pc);
1422 	if (!event)
1423 		goto out;
1424 	entry	= ring_buffer_event_data(event);
1425 	entry->ip			= ip;
1426 	entry->parent_ip		= parent_ip;
1427 
1428 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1429 
1430  out:
1431 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1432 	ftrace_preempt_enable(resched);
1433 }
1434 
1435 static struct ftrace_ops trace_ops __initdata  =
1436 {
1437 	.func = function_test_events_call,
1438 };
1439 
1440 static __init void event_trace_self_test_with_function(void)
1441 {
1442 	register_ftrace_function(&trace_ops);
1443 	pr_info("Running tests again, along with the function tracer\n");
1444 	event_trace_self_tests();
1445 	unregister_ftrace_function(&trace_ops);
1446 }
1447 #else
1448 static __init void event_trace_self_test_with_function(void)
1449 {
1450 }
1451 #endif
1452 
1453 static __init int event_trace_self_tests_init(void)
1454 {
1455 	if (!tracing_selftest_disabled) {
1456 		event_trace_self_tests();
1457 		event_trace_self_test_with_function();
1458 	}
1459 
1460 	return 0;
1461 }
1462 
1463 late_initcall(event_trace_self_tests_init);
1464 
1465 #endif
1466