xref: /linux/kernel/trace/trace_events.c (revision a0b54e256d513ed99e456bea6e4e188ff92e7c46)
1 /*
2  * event tracer
3  *
4  * Copyright (C) 2008 Red Hat Inc, Steven Rostedt <srostedt@redhat.com>
5  *
6  *  - Added format output of fields of the trace point.
7  *    This was based off of work by Tom Zanussi <tzanussi@gmail.com>.
8  *
9  */
10 
11 #include <linux/workqueue.h>
12 #include <linux/spinlock.h>
13 #include <linux/kthread.h>
14 #include <linux/debugfs.h>
15 #include <linux/uaccess.h>
16 #include <linux/module.h>
17 #include <linux/ctype.h>
18 #include <linux/delay.h>
19 
20 #include <asm/setup.h>
21 
22 #include "trace_output.h"
23 
24 #define TRACE_SYSTEM "TRACE_SYSTEM"
25 
26 DEFINE_MUTEX(event_mutex);
27 
28 LIST_HEAD(ftrace_events);
29 
30 int trace_define_field(struct ftrace_event_call *call, const char *type,
31 		       const char *name, int offset, int size, int is_signed,
32 		       int filter_type)
33 {
34 	struct ftrace_event_field *field;
35 
36 	field = kzalloc(sizeof(*field), GFP_KERNEL);
37 	if (!field)
38 		goto err;
39 
40 	field->name = kstrdup(name, GFP_KERNEL);
41 	if (!field->name)
42 		goto err;
43 
44 	field->type = kstrdup(type, GFP_KERNEL);
45 	if (!field->type)
46 		goto err;
47 
48 	if (filter_type == FILTER_OTHER)
49 		field->filter_type = filter_assign_type(type);
50 	else
51 		field->filter_type = filter_type;
52 
53 	field->offset = offset;
54 	field->size = size;
55 	field->is_signed = is_signed;
56 
57 	list_add(&field->link, &call->fields);
58 
59 	return 0;
60 
61 err:
62 	if (field) {
63 		kfree(field->name);
64 		kfree(field->type);
65 	}
66 	kfree(field);
67 
68 	return -ENOMEM;
69 }
70 EXPORT_SYMBOL_GPL(trace_define_field);
71 
72 #define __common_field(type, item)					\
73 	ret = trace_define_field(call, #type, "common_" #item,		\
74 				 offsetof(typeof(ent), item),		\
75 				 sizeof(ent.item),			\
76 				 is_signed_type(type), FILTER_OTHER);	\
77 	if (ret)							\
78 		return ret;
79 
80 int trace_define_common_fields(struct ftrace_event_call *call)
81 {
82 	int ret;
83 	struct trace_entry ent;
84 
85 	__common_field(unsigned short, type);
86 	__common_field(unsigned char, flags);
87 	__common_field(unsigned char, preempt_count);
88 	__common_field(int, pid);
89 	__common_field(int, tgid);
90 
91 	return ret;
92 }
93 EXPORT_SYMBOL_GPL(trace_define_common_fields);
94 
95 #ifdef CONFIG_MODULES
96 
97 static void trace_destroy_fields(struct ftrace_event_call *call)
98 {
99 	struct ftrace_event_field *field, *next;
100 
101 	list_for_each_entry_safe(field, next, &call->fields, link) {
102 		list_del(&field->link);
103 		kfree(field->type);
104 		kfree(field->name);
105 		kfree(field);
106 	}
107 }
108 
109 #endif /* CONFIG_MODULES */
110 
111 static void ftrace_event_enable_disable(struct ftrace_event_call *call,
112 					int enable)
113 {
114 	switch (enable) {
115 	case 0:
116 		if (call->enabled) {
117 			call->enabled = 0;
118 			tracing_stop_cmdline_record();
119 			call->unregfunc(call->data);
120 		}
121 		break;
122 	case 1:
123 		if (!call->enabled) {
124 			call->enabled = 1;
125 			tracing_start_cmdline_record();
126 			call->regfunc(call->data);
127 		}
128 		break;
129 	}
130 }
131 
132 static void ftrace_clear_events(void)
133 {
134 	struct ftrace_event_call *call;
135 
136 	mutex_lock(&event_mutex);
137 	list_for_each_entry(call, &ftrace_events, list) {
138 		ftrace_event_enable_disable(call, 0);
139 	}
140 	mutex_unlock(&event_mutex);
141 }
142 
143 /*
144  * __ftrace_set_clr_event(NULL, NULL, NULL, set) will set/unset all events.
145  */
146 static int __ftrace_set_clr_event(const char *match, const char *sub,
147 				  const char *event, int set)
148 {
149 	struct ftrace_event_call *call;
150 	int ret = -EINVAL;
151 
152 	mutex_lock(&event_mutex);
153 	list_for_each_entry(call, &ftrace_events, list) {
154 
155 		if (!call->name || !call->regfunc)
156 			continue;
157 
158 		if (match &&
159 		    strcmp(match, call->name) != 0 &&
160 		    strcmp(match, call->system) != 0)
161 			continue;
162 
163 		if (sub && strcmp(sub, call->system) != 0)
164 			continue;
165 
166 		if (event && strcmp(event, call->name) != 0)
167 			continue;
168 
169 		ftrace_event_enable_disable(call, set);
170 
171 		ret = 0;
172 	}
173 	mutex_unlock(&event_mutex);
174 
175 	return ret;
176 }
177 
178 static int ftrace_set_clr_event(char *buf, int set)
179 {
180 	char *event = NULL, *sub = NULL, *match;
181 
182 	/*
183 	 * The buf format can be <subsystem>:<event-name>
184 	 *  *:<event-name> means any event by that name.
185 	 *  :<event-name> is the same.
186 	 *
187 	 *  <subsystem>:* means all events in that subsystem
188 	 *  <subsystem>: means the same.
189 	 *
190 	 *  <name> (no ':') means all events in a subsystem with
191 	 *  the name <name> or any event that matches <name>
192 	 */
193 
194 	match = strsep(&buf, ":");
195 	if (buf) {
196 		sub = match;
197 		event = buf;
198 		match = NULL;
199 
200 		if (!strlen(sub) || strcmp(sub, "*") == 0)
201 			sub = NULL;
202 		if (!strlen(event) || strcmp(event, "*") == 0)
203 			event = NULL;
204 	}
205 
206 	return __ftrace_set_clr_event(match, sub, event, set);
207 }
208 
209 /**
210  * trace_set_clr_event - enable or disable an event
211  * @system: system name to match (NULL for any system)
212  * @event: event name to match (NULL for all events, within system)
213  * @set: 1 to enable, 0 to disable
214  *
215  * This is a way for other parts of the kernel to enable or disable
216  * event recording.
217  *
218  * Returns 0 on success, -EINVAL if the parameters do not match any
219  * registered events.
220  */
221 int trace_set_clr_event(const char *system, const char *event, int set)
222 {
223 	return __ftrace_set_clr_event(NULL, system, event, set);
224 }
225 
226 /* 128 should be much more than enough */
227 #define EVENT_BUF_SIZE		127
228 
229 static ssize_t
230 ftrace_event_write(struct file *file, const char __user *ubuf,
231 		   size_t cnt, loff_t *ppos)
232 {
233 	size_t read = 0;
234 	int i, set = 1;
235 	ssize_t ret;
236 	char *buf;
237 	char ch;
238 
239 	if (!cnt || cnt < 0)
240 		return 0;
241 
242 	ret = tracing_update_buffers();
243 	if (ret < 0)
244 		return ret;
245 
246 	ret = get_user(ch, ubuf++);
247 	if (ret)
248 		return ret;
249 	read++;
250 	cnt--;
251 
252 	/* skip white space */
253 	while (cnt && isspace(ch)) {
254 		ret = get_user(ch, ubuf++);
255 		if (ret)
256 			return ret;
257 		read++;
258 		cnt--;
259 	}
260 
261 	/* Only white space found? */
262 	if (isspace(ch)) {
263 		file->f_pos += read;
264 		ret = read;
265 		return ret;
266 	}
267 
268 	buf = kmalloc(EVENT_BUF_SIZE+1, GFP_KERNEL);
269 	if (!buf)
270 		return -ENOMEM;
271 
272 	if (cnt > EVENT_BUF_SIZE)
273 		cnt = EVENT_BUF_SIZE;
274 
275 	i = 0;
276 	while (cnt && !isspace(ch)) {
277 		if (!i && ch == '!')
278 			set = 0;
279 		else
280 			buf[i++] = ch;
281 
282 		ret = get_user(ch, ubuf++);
283 		if (ret)
284 			goto out_free;
285 		read++;
286 		cnt--;
287 	}
288 	buf[i] = 0;
289 
290 	file->f_pos += read;
291 
292 	ret = ftrace_set_clr_event(buf, set);
293 	if (ret)
294 		goto out_free;
295 
296 	ret = read;
297 
298  out_free:
299 	kfree(buf);
300 
301 	return ret;
302 }
303 
304 static void *
305 t_next(struct seq_file *m, void *v, loff_t *pos)
306 {
307 	struct list_head *list = m->private;
308 	struct ftrace_event_call *call;
309 
310 	(*pos)++;
311 
312 	for (;;) {
313 		if (list == &ftrace_events)
314 			return NULL;
315 
316 		call = list_entry(list, struct ftrace_event_call, list);
317 
318 		/*
319 		 * The ftrace subsystem is for showing formats only.
320 		 * They can not be enabled or disabled via the event files.
321 		 */
322 		if (call->regfunc)
323 			break;
324 
325 		list = list->next;
326 	}
327 
328 	m->private = list->next;
329 
330 	return call;
331 }
332 
333 static void *t_start(struct seq_file *m, loff_t *pos)
334 {
335 	struct ftrace_event_call *call = NULL;
336 	loff_t l;
337 
338 	mutex_lock(&event_mutex);
339 
340 	m->private = ftrace_events.next;
341 	for (l = 0; l <= *pos; ) {
342 		call = t_next(m, NULL, &l);
343 		if (!call)
344 			break;
345 	}
346 	return call;
347 }
348 
349 static void *
350 s_next(struct seq_file *m, void *v, loff_t *pos)
351 {
352 	struct list_head *list = m->private;
353 	struct ftrace_event_call *call;
354 
355 	(*pos)++;
356 
357  retry:
358 	if (list == &ftrace_events)
359 		return NULL;
360 
361 	call = list_entry(list, struct ftrace_event_call, list);
362 
363 	if (!call->enabled) {
364 		list = list->next;
365 		goto retry;
366 	}
367 
368 	m->private = list->next;
369 
370 	return call;
371 }
372 
373 static void *s_start(struct seq_file *m, loff_t *pos)
374 {
375 	struct ftrace_event_call *call = NULL;
376 	loff_t l;
377 
378 	mutex_lock(&event_mutex);
379 
380 	m->private = ftrace_events.next;
381 	for (l = 0; l <= *pos; ) {
382 		call = s_next(m, NULL, &l);
383 		if (!call)
384 			break;
385 	}
386 	return call;
387 }
388 
389 static int t_show(struct seq_file *m, void *v)
390 {
391 	struct ftrace_event_call *call = v;
392 
393 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
394 		seq_printf(m, "%s:", call->system);
395 	seq_printf(m, "%s\n", call->name);
396 
397 	return 0;
398 }
399 
400 static void t_stop(struct seq_file *m, void *p)
401 {
402 	mutex_unlock(&event_mutex);
403 }
404 
405 static int
406 ftrace_event_seq_open(struct inode *inode, struct file *file)
407 {
408 	const struct seq_operations *seq_ops;
409 
410 	if ((file->f_mode & FMODE_WRITE) &&
411 	    (file->f_flags & O_TRUNC))
412 		ftrace_clear_events();
413 
414 	seq_ops = inode->i_private;
415 	return seq_open(file, seq_ops);
416 }
417 
418 static ssize_t
419 event_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
420 		  loff_t *ppos)
421 {
422 	struct ftrace_event_call *call = filp->private_data;
423 	char *buf;
424 
425 	if (call->enabled)
426 		buf = "1\n";
427 	else
428 		buf = "0\n";
429 
430 	return simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
431 }
432 
433 static ssize_t
434 event_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
435 		   loff_t *ppos)
436 {
437 	struct ftrace_event_call *call = filp->private_data;
438 	char buf[64];
439 	unsigned long val;
440 	int ret;
441 
442 	if (cnt >= sizeof(buf))
443 		return -EINVAL;
444 
445 	if (copy_from_user(&buf, ubuf, cnt))
446 		return -EFAULT;
447 
448 	buf[cnt] = 0;
449 
450 	ret = strict_strtoul(buf, 10, &val);
451 	if (ret < 0)
452 		return ret;
453 
454 	ret = tracing_update_buffers();
455 	if (ret < 0)
456 		return ret;
457 
458 	switch (val) {
459 	case 0:
460 	case 1:
461 		mutex_lock(&event_mutex);
462 		ftrace_event_enable_disable(call, val);
463 		mutex_unlock(&event_mutex);
464 		break;
465 
466 	default:
467 		return -EINVAL;
468 	}
469 
470 	*ppos += cnt;
471 
472 	return cnt;
473 }
474 
475 static ssize_t
476 system_enable_read(struct file *filp, char __user *ubuf, size_t cnt,
477 		   loff_t *ppos)
478 {
479 	const char set_to_char[4] = { '?', '0', '1', 'X' };
480 	const char *system = filp->private_data;
481 	struct ftrace_event_call *call;
482 	char buf[2];
483 	int set = 0;
484 	int ret;
485 
486 	mutex_lock(&event_mutex);
487 	list_for_each_entry(call, &ftrace_events, list) {
488 		if (!call->name || !call->regfunc)
489 			continue;
490 
491 		if (system && strcmp(call->system, system) != 0)
492 			continue;
493 
494 		/*
495 		 * We need to find out if all the events are set
496 		 * or if all events or cleared, or if we have
497 		 * a mixture.
498 		 */
499 		set |= (1 << !!call->enabled);
500 
501 		/*
502 		 * If we have a mixture, no need to look further.
503 		 */
504 		if (set == 3)
505 			break;
506 	}
507 	mutex_unlock(&event_mutex);
508 
509 	buf[0] = set_to_char[set];
510 	buf[1] = '\n';
511 
512 	ret = simple_read_from_buffer(ubuf, cnt, ppos, buf, 2);
513 
514 	return ret;
515 }
516 
517 static ssize_t
518 system_enable_write(struct file *filp, const char __user *ubuf, size_t cnt,
519 		    loff_t *ppos)
520 {
521 	const char *system = filp->private_data;
522 	unsigned long val;
523 	char buf[64];
524 	ssize_t ret;
525 
526 	if (cnt >= sizeof(buf))
527 		return -EINVAL;
528 
529 	if (copy_from_user(&buf, ubuf, cnt))
530 		return -EFAULT;
531 
532 	buf[cnt] = 0;
533 
534 	ret = strict_strtoul(buf, 10, &val);
535 	if (ret < 0)
536 		return ret;
537 
538 	ret = tracing_update_buffers();
539 	if (ret < 0)
540 		return ret;
541 
542 	if (val != 0 && val != 1)
543 		return -EINVAL;
544 
545 	ret = __ftrace_set_clr_event(NULL, system, NULL, val);
546 	if (ret)
547 		goto out;
548 
549 	ret = cnt;
550 
551 out:
552 	*ppos += cnt;
553 
554 	return ret;
555 }
556 
557 extern char *__bad_type_size(void);
558 
559 #undef FIELD
560 #define FIELD(type, name)						\
561 	sizeof(type) != sizeof(field.name) ? __bad_type_size() :	\
562 	#type, "common_" #name, offsetof(typeof(field), name),		\
563 		sizeof(field.name)
564 
565 static int trace_write_header(struct trace_seq *s)
566 {
567 	struct trace_entry field;
568 
569 	/* struct trace_entry */
570 	return trace_seq_printf(s,
571 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
572 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
573 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
574 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
575 				"\tfield:%s %s;\toffset:%zu;\tsize:%zu;\n"
576 				"\n",
577 				FIELD(unsigned short, type),
578 				FIELD(unsigned char, flags),
579 				FIELD(unsigned char, preempt_count),
580 				FIELD(int, pid),
581 				FIELD(int, tgid));
582 }
583 
584 static ssize_t
585 event_format_read(struct file *filp, char __user *ubuf, size_t cnt,
586 		  loff_t *ppos)
587 {
588 	struct ftrace_event_call *call = filp->private_data;
589 	struct trace_seq *s;
590 	char *buf;
591 	int r;
592 
593 	if (*ppos)
594 		return 0;
595 
596 	s = kmalloc(sizeof(*s), GFP_KERNEL);
597 	if (!s)
598 		return -ENOMEM;
599 
600 	trace_seq_init(s);
601 
602 	/* If any of the first writes fail, so will the show_format. */
603 
604 	trace_seq_printf(s, "name: %s\n", call->name);
605 	trace_seq_printf(s, "ID: %d\n", call->id);
606 	trace_seq_printf(s, "format:\n");
607 	trace_write_header(s);
608 
609 	r = call->show_format(call, s);
610 	if (!r) {
611 		/*
612 		 * ug!  The format output is bigger than a PAGE!!
613 		 */
614 		buf = "FORMAT TOO BIG\n";
615 		r = simple_read_from_buffer(ubuf, cnt, ppos,
616 					      buf, strlen(buf));
617 		goto out;
618 	}
619 
620 	r = simple_read_from_buffer(ubuf, cnt, ppos,
621 				    s->buffer, s->len);
622  out:
623 	kfree(s);
624 	return r;
625 }
626 
627 static ssize_t
628 event_id_read(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
629 {
630 	struct ftrace_event_call *call = filp->private_data;
631 	struct trace_seq *s;
632 	int r;
633 
634 	if (*ppos)
635 		return 0;
636 
637 	s = kmalloc(sizeof(*s), GFP_KERNEL);
638 	if (!s)
639 		return -ENOMEM;
640 
641 	trace_seq_init(s);
642 	trace_seq_printf(s, "%d\n", call->id);
643 
644 	r = simple_read_from_buffer(ubuf, cnt, ppos,
645 				    s->buffer, s->len);
646 	kfree(s);
647 	return r;
648 }
649 
650 static ssize_t
651 event_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
652 		  loff_t *ppos)
653 {
654 	struct ftrace_event_call *call = filp->private_data;
655 	struct trace_seq *s;
656 	int r;
657 
658 	if (*ppos)
659 		return 0;
660 
661 	s = kmalloc(sizeof(*s), GFP_KERNEL);
662 	if (!s)
663 		return -ENOMEM;
664 
665 	trace_seq_init(s);
666 
667 	print_event_filter(call, s);
668 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
669 
670 	kfree(s);
671 
672 	return r;
673 }
674 
675 static ssize_t
676 event_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
677 		   loff_t *ppos)
678 {
679 	struct ftrace_event_call *call = filp->private_data;
680 	char *buf;
681 	int err;
682 
683 	if (cnt >= PAGE_SIZE)
684 		return -EINVAL;
685 
686 	buf = (char *)__get_free_page(GFP_TEMPORARY);
687 	if (!buf)
688 		return -ENOMEM;
689 
690 	if (copy_from_user(buf, ubuf, cnt)) {
691 		free_page((unsigned long) buf);
692 		return -EFAULT;
693 	}
694 	buf[cnt] = '\0';
695 
696 	err = apply_event_filter(call, buf);
697 	free_page((unsigned long) buf);
698 	if (err < 0)
699 		return err;
700 
701 	*ppos += cnt;
702 
703 	return cnt;
704 }
705 
706 static ssize_t
707 subsystem_filter_read(struct file *filp, char __user *ubuf, size_t cnt,
708 		      loff_t *ppos)
709 {
710 	struct event_subsystem *system = filp->private_data;
711 	struct trace_seq *s;
712 	int r;
713 
714 	if (*ppos)
715 		return 0;
716 
717 	s = kmalloc(sizeof(*s), GFP_KERNEL);
718 	if (!s)
719 		return -ENOMEM;
720 
721 	trace_seq_init(s);
722 
723 	print_subsystem_event_filter(system, s);
724 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
725 
726 	kfree(s);
727 
728 	return r;
729 }
730 
731 static ssize_t
732 subsystem_filter_write(struct file *filp, const char __user *ubuf, size_t cnt,
733 		       loff_t *ppos)
734 {
735 	struct event_subsystem *system = filp->private_data;
736 	char *buf;
737 	int err;
738 
739 	if (cnt >= PAGE_SIZE)
740 		return -EINVAL;
741 
742 	buf = (char *)__get_free_page(GFP_TEMPORARY);
743 	if (!buf)
744 		return -ENOMEM;
745 
746 	if (copy_from_user(buf, ubuf, cnt)) {
747 		free_page((unsigned long) buf);
748 		return -EFAULT;
749 	}
750 	buf[cnt] = '\0';
751 
752 	err = apply_subsystem_event_filter(system, buf);
753 	free_page((unsigned long) buf);
754 	if (err < 0)
755 		return err;
756 
757 	*ppos += cnt;
758 
759 	return cnt;
760 }
761 
762 static ssize_t
763 show_header(struct file *filp, char __user *ubuf, size_t cnt, loff_t *ppos)
764 {
765 	int (*func)(struct trace_seq *s) = filp->private_data;
766 	struct trace_seq *s;
767 	int r;
768 
769 	if (*ppos)
770 		return 0;
771 
772 	s = kmalloc(sizeof(*s), GFP_KERNEL);
773 	if (!s)
774 		return -ENOMEM;
775 
776 	trace_seq_init(s);
777 
778 	func(s);
779 	r = simple_read_from_buffer(ubuf, cnt, ppos, s->buffer, s->len);
780 
781 	kfree(s);
782 
783 	return r;
784 }
785 
786 static const struct seq_operations show_event_seq_ops = {
787 	.start = t_start,
788 	.next = t_next,
789 	.show = t_show,
790 	.stop = t_stop,
791 };
792 
793 static const struct seq_operations show_set_event_seq_ops = {
794 	.start = s_start,
795 	.next = s_next,
796 	.show = t_show,
797 	.stop = t_stop,
798 };
799 
800 static const struct file_operations ftrace_avail_fops = {
801 	.open = ftrace_event_seq_open,
802 	.read = seq_read,
803 	.llseek = seq_lseek,
804 	.release = seq_release,
805 };
806 
807 static const struct file_operations ftrace_set_event_fops = {
808 	.open = ftrace_event_seq_open,
809 	.read = seq_read,
810 	.write = ftrace_event_write,
811 	.llseek = seq_lseek,
812 	.release = seq_release,
813 };
814 
815 static const struct file_operations ftrace_enable_fops = {
816 	.open = tracing_open_generic,
817 	.read = event_enable_read,
818 	.write = event_enable_write,
819 };
820 
821 static const struct file_operations ftrace_event_format_fops = {
822 	.open = tracing_open_generic,
823 	.read = event_format_read,
824 };
825 
826 static const struct file_operations ftrace_event_id_fops = {
827 	.open = tracing_open_generic,
828 	.read = event_id_read,
829 };
830 
831 static const struct file_operations ftrace_event_filter_fops = {
832 	.open = tracing_open_generic,
833 	.read = event_filter_read,
834 	.write = event_filter_write,
835 };
836 
837 static const struct file_operations ftrace_subsystem_filter_fops = {
838 	.open = tracing_open_generic,
839 	.read = subsystem_filter_read,
840 	.write = subsystem_filter_write,
841 };
842 
843 static const struct file_operations ftrace_system_enable_fops = {
844 	.open = tracing_open_generic,
845 	.read = system_enable_read,
846 	.write = system_enable_write,
847 };
848 
849 static const struct file_operations ftrace_show_header_fops = {
850 	.open = tracing_open_generic,
851 	.read = show_header,
852 };
853 
854 static struct dentry *event_trace_events_dir(void)
855 {
856 	static struct dentry *d_tracer;
857 	static struct dentry *d_events;
858 
859 	if (d_events)
860 		return d_events;
861 
862 	d_tracer = tracing_init_dentry();
863 	if (!d_tracer)
864 		return NULL;
865 
866 	d_events = debugfs_create_dir("events", d_tracer);
867 	if (!d_events)
868 		pr_warning("Could not create debugfs "
869 			   "'events' directory\n");
870 
871 	return d_events;
872 }
873 
874 static LIST_HEAD(event_subsystems);
875 
876 static struct dentry *
877 event_subsystem_dir(const char *name, struct dentry *d_events)
878 {
879 	struct event_subsystem *system;
880 	struct dentry *entry;
881 
882 	/* First see if we did not already create this dir */
883 	list_for_each_entry(system, &event_subsystems, list) {
884 		if (strcmp(system->name, name) == 0) {
885 			system->nr_events++;
886 			return system->entry;
887 		}
888 	}
889 
890 	/* need to create new entry */
891 	system = kmalloc(sizeof(*system), GFP_KERNEL);
892 	if (!system) {
893 		pr_warning("No memory to create event subsystem %s\n",
894 			   name);
895 		return d_events;
896 	}
897 
898 	system->entry = debugfs_create_dir(name, d_events);
899 	if (!system->entry) {
900 		pr_warning("Could not create event subsystem %s\n",
901 			   name);
902 		kfree(system);
903 		return d_events;
904 	}
905 
906 	system->nr_events = 1;
907 	system->name = kstrdup(name, GFP_KERNEL);
908 	if (!system->name) {
909 		debugfs_remove(system->entry);
910 		kfree(system);
911 		return d_events;
912 	}
913 
914 	list_add(&system->list, &event_subsystems);
915 
916 	system->filter = NULL;
917 
918 	system->filter = kzalloc(sizeof(struct event_filter), GFP_KERNEL);
919 	if (!system->filter) {
920 		pr_warning("Could not allocate filter for subsystem "
921 			   "'%s'\n", name);
922 		return system->entry;
923 	}
924 
925 	entry = debugfs_create_file("filter", 0644, system->entry, system,
926 				    &ftrace_subsystem_filter_fops);
927 	if (!entry) {
928 		kfree(system->filter);
929 		system->filter = NULL;
930 		pr_warning("Could not create debugfs "
931 			   "'%s/filter' entry\n", name);
932 	}
933 
934 	entry = trace_create_file("enable", 0644, system->entry,
935 				  (void *)system->name,
936 				  &ftrace_system_enable_fops);
937 
938 	return system->entry;
939 }
940 
941 static int
942 event_create_dir(struct ftrace_event_call *call, struct dentry *d_events,
943 		 const struct file_operations *id,
944 		 const struct file_operations *enable,
945 		 const struct file_operations *filter,
946 		 const struct file_operations *format)
947 {
948 	struct dentry *entry;
949 	int ret;
950 
951 	/*
952 	 * If the trace point header did not define TRACE_SYSTEM
953 	 * then the system would be called "TRACE_SYSTEM".
954 	 */
955 	if (strcmp(call->system, TRACE_SYSTEM) != 0)
956 		d_events = event_subsystem_dir(call->system, d_events);
957 
958 	call->dir = debugfs_create_dir(call->name, d_events);
959 	if (!call->dir) {
960 		pr_warning("Could not create debugfs "
961 			   "'%s' directory\n", call->name);
962 		return -1;
963 	}
964 
965 	if (call->regfunc)
966 		entry = trace_create_file("enable", 0644, call->dir, call,
967 					  enable);
968 
969 	if (call->id && call->profile_enable)
970 		entry = trace_create_file("id", 0444, call->dir, call,
971 					  id);
972 
973 	if (call->define_fields) {
974 		ret = call->define_fields(call);
975 		if (ret < 0) {
976 			pr_warning("Could not initialize trace point"
977 				   " events/%s\n", call->name);
978 			return ret;
979 		}
980 		entry = trace_create_file("filter", 0644, call->dir, call,
981 					  filter);
982 	}
983 
984 	/* A trace may not want to export its format */
985 	if (!call->show_format)
986 		return 0;
987 
988 	entry = trace_create_file("format", 0444, call->dir, call,
989 				  format);
990 
991 	return 0;
992 }
993 
994 #define for_each_event(event, start, end)			\
995 	for (event = start;					\
996 	     (unsigned long)event < (unsigned long)end;		\
997 	     event++)
998 
999 #ifdef CONFIG_MODULES
1000 
1001 static LIST_HEAD(ftrace_module_file_list);
1002 
1003 /*
1004  * Modules must own their file_operations to keep up with
1005  * reference counting.
1006  */
1007 struct ftrace_module_file_ops {
1008 	struct list_head		list;
1009 	struct module			*mod;
1010 	struct file_operations		id;
1011 	struct file_operations		enable;
1012 	struct file_operations		format;
1013 	struct file_operations		filter;
1014 };
1015 
1016 static void remove_subsystem_dir(const char *name)
1017 {
1018 	struct event_subsystem *system;
1019 
1020 	if (strcmp(name, TRACE_SYSTEM) == 0)
1021 		return;
1022 
1023 	list_for_each_entry(system, &event_subsystems, list) {
1024 		if (strcmp(system->name, name) == 0) {
1025 			if (!--system->nr_events) {
1026 				struct event_filter *filter = system->filter;
1027 
1028 				debugfs_remove_recursive(system->entry);
1029 				list_del(&system->list);
1030 				if (filter) {
1031 					kfree(filter->filter_string);
1032 					kfree(filter);
1033 				}
1034 				kfree(system->name);
1035 				kfree(system);
1036 			}
1037 			break;
1038 		}
1039 	}
1040 }
1041 
1042 static struct ftrace_module_file_ops *
1043 trace_create_file_ops(struct module *mod)
1044 {
1045 	struct ftrace_module_file_ops *file_ops;
1046 
1047 	/*
1048 	 * This is a bit of a PITA. To allow for correct reference
1049 	 * counting, modules must "own" their file_operations.
1050 	 * To do this, we allocate the file operations that will be
1051 	 * used in the event directory.
1052 	 */
1053 
1054 	file_ops = kmalloc(sizeof(*file_ops), GFP_KERNEL);
1055 	if (!file_ops)
1056 		return NULL;
1057 
1058 	file_ops->mod = mod;
1059 
1060 	file_ops->id = ftrace_event_id_fops;
1061 	file_ops->id.owner = mod;
1062 
1063 	file_ops->enable = ftrace_enable_fops;
1064 	file_ops->enable.owner = mod;
1065 
1066 	file_ops->filter = ftrace_event_filter_fops;
1067 	file_ops->filter.owner = mod;
1068 
1069 	file_ops->format = ftrace_event_format_fops;
1070 	file_ops->format.owner = mod;
1071 
1072 	list_add(&file_ops->list, &ftrace_module_file_list);
1073 
1074 	return file_ops;
1075 }
1076 
1077 static void trace_module_add_events(struct module *mod)
1078 {
1079 	struct ftrace_module_file_ops *file_ops = NULL;
1080 	struct ftrace_event_call *call, *start, *end;
1081 	struct dentry *d_events;
1082 	int ret;
1083 
1084 	start = mod->trace_events;
1085 	end = mod->trace_events + mod->num_trace_events;
1086 
1087 	if (start == end)
1088 		return;
1089 
1090 	d_events = event_trace_events_dir();
1091 	if (!d_events)
1092 		return;
1093 
1094 	for_each_event(call, start, end) {
1095 		/* The linker may leave blanks */
1096 		if (!call->name)
1097 			continue;
1098 		if (call->raw_init) {
1099 			ret = call->raw_init();
1100 			if (ret < 0) {
1101 				if (ret != -ENOSYS)
1102 					pr_warning("Could not initialize trace "
1103 					"point events/%s\n", call->name);
1104 				continue;
1105 			}
1106 		}
1107 		/*
1108 		 * This module has events, create file ops for this module
1109 		 * if not already done.
1110 		 */
1111 		if (!file_ops) {
1112 			file_ops = trace_create_file_ops(mod);
1113 			if (!file_ops)
1114 				return;
1115 		}
1116 		call->mod = mod;
1117 		list_add(&call->list, &ftrace_events);
1118 		event_create_dir(call, d_events,
1119 				 &file_ops->id, &file_ops->enable,
1120 				 &file_ops->filter, &file_ops->format);
1121 	}
1122 }
1123 
1124 static void trace_module_remove_events(struct module *mod)
1125 {
1126 	struct ftrace_module_file_ops *file_ops;
1127 	struct ftrace_event_call *call, *p;
1128 	bool found = false;
1129 
1130 	down_write(&trace_event_mutex);
1131 	list_for_each_entry_safe(call, p, &ftrace_events, list) {
1132 		if (call->mod == mod) {
1133 			found = true;
1134 			ftrace_event_enable_disable(call, 0);
1135 			if (call->event)
1136 				__unregister_ftrace_event(call->event);
1137 			debugfs_remove_recursive(call->dir);
1138 			list_del(&call->list);
1139 			trace_destroy_fields(call);
1140 			destroy_preds(call);
1141 			remove_subsystem_dir(call->system);
1142 		}
1143 	}
1144 
1145 	/* Now free the file_operations */
1146 	list_for_each_entry(file_ops, &ftrace_module_file_list, list) {
1147 		if (file_ops->mod == mod)
1148 			break;
1149 	}
1150 	if (&file_ops->list != &ftrace_module_file_list) {
1151 		list_del(&file_ops->list);
1152 		kfree(file_ops);
1153 	}
1154 
1155 	/*
1156 	 * It is safest to reset the ring buffer if the module being unloaded
1157 	 * registered any events.
1158 	 */
1159 	if (found)
1160 		tracing_reset_current_online_cpus();
1161 	up_write(&trace_event_mutex);
1162 }
1163 
1164 static int trace_module_notify(struct notifier_block *self,
1165 			       unsigned long val, void *data)
1166 {
1167 	struct module *mod = data;
1168 
1169 	mutex_lock(&event_mutex);
1170 	switch (val) {
1171 	case MODULE_STATE_COMING:
1172 		trace_module_add_events(mod);
1173 		break;
1174 	case MODULE_STATE_GOING:
1175 		trace_module_remove_events(mod);
1176 		break;
1177 	}
1178 	mutex_unlock(&event_mutex);
1179 
1180 	return 0;
1181 }
1182 #else
1183 static int trace_module_notify(struct notifier_block *self,
1184 			       unsigned long val, void *data)
1185 {
1186 	return 0;
1187 }
1188 #endif /* CONFIG_MODULES */
1189 
1190 struct notifier_block trace_module_nb = {
1191 	.notifier_call = trace_module_notify,
1192 	.priority = 0,
1193 };
1194 
1195 extern struct ftrace_event_call __start_ftrace_events[];
1196 extern struct ftrace_event_call __stop_ftrace_events[];
1197 
1198 static char bootup_event_buf[COMMAND_LINE_SIZE] __initdata;
1199 
1200 static __init int setup_trace_event(char *str)
1201 {
1202 	strlcpy(bootup_event_buf, str, COMMAND_LINE_SIZE);
1203 	ring_buffer_expanded = 1;
1204 	tracing_selftest_disabled = 1;
1205 
1206 	return 1;
1207 }
1208 __setup("trace_event=", setup_trace_event);
1209 
1210 static __init int event_trace_init(void)
1211 {
1212 	struct ftrace_event_call *call;
1213 	struct dentry *d_tracer;
1214 	struct dentry *entry;
1215 	struct dentry *d_events;
1216 	int ret;
1217 	char *buf = bootup_event_buf;
1218 	char *token;
1219 
1220 	d_tracer = tracing_init_dentry();
1221 	if (!d_tracer)
1222 		return 0;
1223 
1224 	entry = debugfs_create_file("available_events", 0444, d_tracer,
1225 				    (void *)&show_event_seq_ops,
1226 				    &ftrace_avail_fops);
1227 	if (!entry)
1228 		pr_warning("Could not create debugfs "
1229 			   "'available_events' entry\n");
1230 
1231 	entry = debugfs_create_file("set_event", 0644, d_tracer,
1232 				    (void *)&show_set_event_seq_ops,
1233 				    &ftrace_set_event_fops);
1234 	if (!entry)
1235 		pr_warning("Could not create debugfs "
1236 			   "'set_event' entry\n");
1237 
1238 	d_events = event_trace_events_dir();
1239 	if (!d_events)
1240 		return 0;
1241 
1242 	/* ring buffer internal formats */
1243 	trace_create_file("header_page", 0444, d_events,
1244 			  ring_buffer_print_page_header,
1245 			  &ftrace_show_header_fops);
1246 
1247 	trace_create_file("header_event", 0444, d_events,
1248 			  ring_buffer_print_entry_header,
1249 			  &ftrace_show_header_fops);
1250 
1251 	trace_create_file("enable", 0644, d_events,
1252 			  NULL, &ftrace_system_enable_fops);
1253 
1254 	for_each_event(call, __start_ftrace_events, __stop_ftrace_events) {
1255 		/* The linker may leave blanks */
1256 		if (!call->name)
1257 			continue;
1258 		if (call->raw_init) {
1259 			ret = call->raw_init();
1260 			if (ret < 0) {
1261 				if (ret != -ENOSYS)
1262 					pr_warning("Could not initialize trace "
1263 					"point events/%s\n", call->name);
1264 				continue;
1265 			}
1266 		}
1267 		list_add(&call->list, &ftrace_events);
1268 		event_create_dir(call, d_events, &ftrace_event_id_fops,
1269 				 &ftrace_enable_fops, &ftrace_event_filter_fops,
1270 				 &ftrace_event_format_fops);
1271 	}
1272 
1273 	while (true) {
1274 		token = strsep(&buf, ",");
1275 
1276 		if (!token)
1277 			break;
1278 		if (!*token)
1279 			continue;
1280 
1281 		ret = ftrace_set_clr_event(token, 1);
1282 		if (ret)
1283 			pr_warning("Failed to enable trace event: %s\n", token);
1284 	}
1285 
1286 	ret = register_module_notifier(&trace_module_nb);
1287 	if (ret)
1288 		pr_warning("Failed to register trace events module notifier\n");
1289 
1290 	return 0;
1291 }
1292 fs_initcall(event_trace_init);
1293 
1294 #ifdef CONFIG_FTRACE_STARTUP_TEST
1295 
1296 static DEFINE_SPINLOCK(test_spinlock);
1297 static DEFINE_SPINLOCK(test_spinlock_irq);
1298 static DEFINE_MUTEX(test_mutex);
1299 
1300 static __init void test_work(struct work_struct *dummy)
1301 {
1302 	spin_lock(&test_spinlock);
1303 	spin_lock_irq(&test_spinlock_irq);
1304 	udelay(1);
1305 	spin_unlock_irq(&test_spinlock_irq);
1306 	spin_unlock(&test_spinlock);
1307 
1308 	mutex_lock(&test_mutex);
1309 	msleep(1);
1310 	mutex_unlock(&test_mutex);
1311 }
1312 
1313 static __init int event_test_thread(void *unused)
1314 {
1315 	void *test_malloc;
1316 
1317 	test_malloc = kmalloc(1234, GFP_KERNEL);
1318 	if (!test_malloc)
1319 		pr_info("failed to kmalloc\n");
1320 
1321 	schedule_on_each_cpu(test_work);
1322 
1323 	kfree(test_malloc);
1324 
1325 	set_current_state(TASK_INTERRUPTIBLE);
1326 	while (!kthread_should_stop())
1327 		schedule();
1328 
1329 	return 0;
1330 }
1331 
1332 /*
1333  * Do various things that may trigger events.
1334  */
1335 static __init void event_test_stuff(void)
1336 {
1337 	struct task_struct *test_thread;
1338 
1339 	test_thread = kthread_run(event_test_thread, NULL, "test-events");
1340 	msleep(1);
1341 	kthread_stop(test_thread);
1342 }
1343 
1344 /*
1345  * For every trace event defined, we will test each trace point separately,
1346  * and then by groups, and finally all trace points.
1347  */
1348 static __init void event_trace_self_tests(void)
1349 {
1350 	struct ftrace_event_call *call;
1351 	struct event_subsystem *system;
1352 	int ret;
1353 
1354 	pr_info("Running tests on trace events:\n");
1355 
1356 	list_for_each_entry(call, &ftrace_events, list) {
1357 
1358 		/* Only test those that have a regfunc */
1359 		if (!call->regfunc)
1360 			continue;
1361 
1362 		pr_info("Testing event %s: ", call->name);
1363 
1364 		/*
1365 		 * If an event is already enabled, someone is using
1366 		 * it and the self test should not be on.
1367 		 */
1368 		if (call->enabled) {
1369 			pr_warning("Enabled event during self test!\n");
1370 			WARN_ON_ONCE(1);
1371 			continue;
1372 		}
1373 
1374 		ftrace_event_enable_disable(call, 1);
1375 		event_test_stuff();
1376 		ftrace_event_enable_disable(call, 0);
1377 
1378 		pr_cont("OK\n");
1379 	}
1380 
1381 	/* Now test at the sub system level */
1382 
1383 	pr_info("Running tests on trace event systems:\n");
1384 
1385 	list_for_each_entry(system, &event_subsystems, list) {
1386 
1387 		/* the ftrace system is special, skip it */
1388 		if (strcmp(system->name, "ftrace") == 0)
1389 			continue;
1390 
1391 		pr_info("Testing event system %s: ", system->name);
1392 
1393 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 1);
1394 		if (WARN_ON_ONCE(ret)) {
1395 			pr_warning("error enabling system %s\n",
1396 				   system->name);
1397 			continue;
1398 		}
1399 
1400 		event_test_stuff();
1401 
1402 		ret = __ftrace_set_clr_event(NULL, system->name, NULL, 0);
1403 		if (WARN_ON_ONCE(ret))
1404 			pr_warning("error disabling system %s\n",
1405 				   system->name);
1406 
1407 		pr_cont("OK\n");
1408 	}
1409 
1410 	/* Test with all events enabled */
1411 
1412 	pr_info("Running tests on all trace events:\n");
1413 	pr_info("Testing all events: ");
1414 
1415 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 1);
1416 	if (WARN_ON_ONCE(ret)) {
1417 		pr_warning("error enabling all events\n");
1418 		return;
1419 	}
1420 
1421 	event_test_stuff();
1422 
1423 	/* reset sysname */
1424 	ret = __ftrace_set_clr_event(NULL, NULL, NULL, 0);
1425 	if (WARN_ON_ONCE(ret)) {
1426 		pr_warning("error disabling all events\n");
1427 		return;
1428 	}
1429 
1430 	pr_cont("OK\n");
1431 }
1432 
1433 #ifdef CONFIG_FUNCTION_TRACER
1434 
1435 static DEFINE_PER_CPU(atomic_t, ftrace_test_event_disable);
1436 
1437 static void
1438 function_test_events_call(unsigned long ip, unsigned long parent_ip)
1439 {
1440 	struct ring_buffer_event *event;
1441 	struct ring_buffer *buffer;
1442 	struct ftrace_entry *entry;
1443 	unsigned long flags;
1444 	long disabled;
1445 	int resched;
1446 	int cpu;
1447 	int pc;
1448 
1449 	pc = preempt_count();
1450 	resched = ftrace_preempt_disable();
1451 	cpu = raw_smp_processor_id();
1452 	disabled = atomic_inc_return(&per_cpu(ftrace_test_event_disable, cpu));
1453 
1454 	if (disabled != 1)
1455 		goto out;
1456 
1457 	local_save_flags(flags);
1458 
1459 	event = trace_current_buffer_lock_reserve(&buffer,
1460 						  TRACE_FN, sizeof(*entry),
1461 						  flags, pc);
1462 	if (!event)
1463 		goto out;
1464 	entry	= ring_buffer_event_data(event);
1465 	entry->ip			= ip;
1466 	entry->parent_ip		= parent_ip;
1467 
1468 	trace_nowake_buffer_unlock_commit(buffer, event, flags, pc);
1469 
1470  out:
1471 	atomic_dec(&per_cpu(ftrace_test_event_disable, cpu));
1472 	ftrace_preempt_enable(resched);
1473 }
1474 
1475 static struct ftrace_ops trace_ops __initdata  =
1476 {
1477 	.func = function_test_events_call,
1478 };
1479 
1480 static __init void event_trace_self_test_with_function(void)
1481 {
1482 	register_ftrace_function(&trace_ops);
1483 	pr_info("Running tests again, along with the function tracer\n");
1484 	event_trace_self_tests();
1485 	unregister_ftrace_function(&trace_ops);
1486 }
1487 #else
1488 static __init void event_trace_self_test_with_function(void)
1489 {
1490 }
1491 #endif
1492 
1493 static __init int event_trace_self_tests_init(void)
1494 {
1495 	if (!tracing_selftest_disabled) {
1496 		event_trace_self_tests();
1497 		event_trace_self_test_with_function();
1498 	}
1499 
1500 	return 0;
1501 }
1502 
1503 late_initcall(event_trace_self_tests_init);
1504 
1505 #endif
1506